# Putnam Predictions

I'm cooked for this won't lie
so my strategy is going to be a more analytical one: classify putnam problems and try to predict whats likely to show up on the exam this year
I plan on writing the exam the next couple years so this is V0.01

Tasks:
Download: Exams from 1984-2023 
          Solutions from 1995-2023
Optional: Questions & Solutions from "The William Lowell Putnam Mathematical Competition. Problems and Solutions: 1965-1984, edited by Alexanderson, Klosinski, and Larson""

Classification Run using Llama
Figure out how to ssh into https://cloud.lambdalabs.com/ to run LLAMA 3.1 405B 
train something like KNN to categorize these into problem or solution types (possibly solution)

Then train some Neural Network on making predictions based on these groupings


## Data Preprocessing

### Data Download

In [24]:
import requests
import os
import time
from requests.exceptions import RequestException

def download_file(url, filename):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Cache-Control': 'max-age=0',
    }
    
    try:
        response = requests.get(url, headers=headers, timeout=10, allow_redirects=True)
        response.raise_for_status()
        
        # Check if the response is a redirection
        if response.history:
            print(f"Request was redirected for {filename}")
            for resp in response.history:
                print(f"Redirect: {resp.status_code} - {resp.url}")
            print(f"Final destination: {response.status_code} - {response.url}")
        
        with open(filename, 'wb') as file:
            file.write(response.content)
        print(f"Downloaded: {filename}")
        return True
    except RequestException as e:
        print(f"Failed to download {filename}: {str(e)}")
        return False

def scrape_putnam_archive():
    base_url = "https://kskedlaya.org/putnam-archive/"
    
    if not os.path.exists('putnam_files'):
        os.makedirs('putnam_files')

    successful_downloads = 0
    failed_downloads = 0

    for year in range(1985, 2024):
        # Try to download problem file
        problem_url = f"{base_url}{year}.tex"
        problem_filename = f"putnam_files/{year}.tex"
        if download_file(problem_url, problem_filename):
            successful_downloads += 1
        else:
            failed_downloads += 1

        if year >= 1995: 
            # Try to download solution file
            solution_url = f"{base_url}{year}s.tex"
            solution_filename = f"putnam_files/{year}s.tex"
            if download_file(solution_url, solution_filename):
                successful_downloads += 1
            else:
                failed_downloads += 1

        time.sleep(2)  # Delay between requests

    print(f"\nDownload summary:")
    print(f"Successful downloads: {successful_downloads}")
    print(f"Failed downloads: {failed_downloads}")

In [22]:
scrape_putnam_archive()

Downloaded: putnam_files/1985.tex
Failed to download putnam_files/1985s.tex: 404 Client Error: Not Found for url: https://kskedlaya.org/putnam-archive/1985s.tex
Downloaded: putnam_files/1986.tex
Failed to download putnam_files/1986s.tex: 404 Client Error: Not Found for url: https://kskedlaya.org/putnam-archive/1986s.tex
Downloaded: putnam_files/1987.tex
Failed to download putnam_files/1987s.tex: 404 Client Error: Not Found for url: https://kskedlaya.org/putnam-archive/1987s.tex
Downloaded: putnam_files/1988.tex
Failed to download putnam_files/1988s.tex: 404 Client Error: Not Found for url: https://kskedlaya.org/putnam-archive/1988s.tex
Downloaded: putnam_files/1989.tex
Failed to download putnam_files/1989s.tex: 404 Client Error: Not Found for url: https://kskedlaya.org/putnam-archive/1989s.tex
Downloaded: putnam_files/1990.tex
Failed to download putnam_files/1990s.tex: 404 Client Error: Not Found for url: https://kskedlaya.org/putnam-archive/1990s.tex
Downloaded: putnam_files/1991.tex


### Data Processing - for classification

In [38]:
import json
import os
import re

def parse_tex_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    # This is a basic parser and may need refinement based on the exact TeX structure
    problems = re.split(r'\n\\item', content)
    return [prob.strip() for prob in problems if prob.strip()]

def create_problem_solution_pairs():
    if not os.path.exists('putnam_problems'):
        os.makedirs('putnam_problems')

    for year in range(1985, 2024):
        problem_file = f'putnam_files/{year}.tex'
        solution_file = f'putnam_files/{year}s.tex'
        
        if os.path.exists(problem_file):
            problems = parse_tex_file(problem_file)
            solutions = parse_tex_file(solution_file) if os.path.exists(solution_file) else []
            
            for i, problem in enumerate(problems):
                question_number = f"{'A' if i < 6 else 'B'}{(i % 6) + 1}"
                
                data = {
                    "year": year,
                    "question_number": question_number,
                    "question": problem,
                    "solution": solutions[i] if i < len(solutions) else None
                }
                
                filename = f'putnam_problems/putnam_{year}_{question_number}.json'
                save_to_json(data, filename)
        else:
            print(f"Problem file for year {year} not found.")

def save_to_json(data, filename):
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    print(f"Saved: {filename}")


In [42]:
create_problem_solution_pairs()

Saved: putnam_problems/putnam_1985_A1.json
Saved: putnam_problems/putnam_1985_A2.json
Saved: putnam_problems/putnam_1985_A3.json
Saved: putnam_problems/putnam_1985_A4.json
Saved: putnam_problems/putnam_1985_A5.json
Saved: putnam_problems/putnam_1985_A6.json
Saved: putnam_problems/putnam_1985_B1.json
Saved: putnam_problems/putnam_1985_B2.json
Saved: putnam_problems/putnam_1985_B3.json
Saved: putnam_problems/putnam_1985_B4.json
Saved: putnam_problems/putnam_1985_B5.json
Saved: putnam_problems/putnam_1985_B6.json
Saved: putnam_problems/putnam_1985_B1.json
Saved: putnam_problems/putnam_1985_B2.json
Saved: putnam_problems/putnam_1985_B3.json
Saved: putnam_problems/putnam_1985_B4.json
Saved: putnam_problems/putnam_1985_B5.json
Saved: putnam_problems/putnam_1986_A1.json
Saved: putnam_problems/putnam_1986_A2.json
Saved: putnam_problems/putnam_1986_A3.json
Saved: putnam_problems/putnam_1986_A4.json
Saved: putnam_problems/putnam_1986_A5.json
Saved: putnam_problems/putnam_1986_A6.json
Saved: putn