In [118]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import pickle
import os

In [119]:
years = [2020, 2021, 2022, 2023, 2024]
regions = [
  ("karachi", "pakistan", 24.8607, 67.0011),
  ("lahore", "pakistan", 31.5204, 74.3587),
  ("islamabad", "pakistan", 33.6844, 73.0479),
  ("rawalpindi", "pakistan", 33.5651, 73.0169),
  ("peshawar", "pakistan", 34.0151, 71.5249),
  ("quetta", "pakistan", 30.1798, 66.9750),
  ("multan", "pakistan", 30.1575, 71.5249),
  ("hyderabad", "pakistan", 25.3960, 68.3578),
  ("mumbai", "india", 19.0760, 72.8777),
  ("delhi", "india", 28.7041, 77.1025),
  ("kolkata", "india", 22.5726, 88.3639),
  ("chennai", "india", 13.0827, 80.2707),
  ("bengaluru", "india", 12.9716, 77.5946),
  ("hyderabad", "india", 17.3850, 78.4867),
  ("ahmedabad", "india", 23.0225, 72.5714),
  ("pune", "india", 18.5204, 73.8567),
  ("surat", "india", 21.1702, 72.8311),
  ("jaipur", "india", 26.9124, 75.7873),
  ("lucknow", "india", 26.8467, 80.9462),
  ("patna", "india", 25.5941, 85.1376),
  ("dhaka", "bangladesh", 23.8103, 90.4125),
  ("chittagong", "bangladesh", 22.3569, 91.7832),
  ("colombo", "sri lanka", 6.9271, 79.8612),
  ("kandy", "sri lanka", 7.2906, 80.6337),
  ("kathmandu", "nepal", 27.7172, 85.3240)
]

In [120]:
df = pd.read_csv('master.csv')
df.dropna(inplace=True)
df['year'] = df['date'].map(lambda v: v[:4])

targets = list(df[['PRECTOTCORR', 'PS', 'QV2M', 'T2M', 'U10M', 'V10M']].columns)
print(targets)

['PRECTOTCORR', 'PS', 'QV2M', 'T2M', 'U10M', 'V10M']


In [121]:
def get_data(lat, lon, year, target):
  subset = df[(df['latitude'] == lat) &
              (df['longitude'] == lon) &
              (df['year'] == year)]
  
  X = np.arange(len(subset)).reshape(-1, 1)
  
  y = subset[target].values
  
  return X, y

results = {}

for (city, country, lat, lon) in regions:
  for year in years:
    for target in targets:
      X, y = get_data(lat, lon, str(year), target)
      
      if X is None or len(y) <= 1:
        coeffs, intercept = [0, 0], 0
      else:
        poly = PolynomialFeatures(degree=2)
        X_poly = poly.fit_transform(X)
        model = LinearRegression()
        model.fit(X_poly, y)

        coeffs = model.coef_.tolist()
        intercept = float(model.intercept_)

        # # ---- plotting ----
        # X_range = np.linspace(X.min(), X.max(), 300).reshape(-1, 1)
        # X_range_poly = poly.transform(X_range)
        # y_pred = model.predict(X_range_poly)

        # plt.figure(figsize=(8, 5))
        # plt.scatter(X, y, color="blue", s=10, label="data")
        # plt.plot(X_range, y_pred, color="red", linewidth=2, label="quadratic fit")
        # plt.title(f"{city} ({country}) - {year} - {target}")
        # plt.xlabel("Time Index")
        # plt.ylabel(target)
        # plt.legend()

        # # save plot
        # filename = f"regression_plots/{city}_{year}_{target}.png".replace(" ", "_")
        # plt.savefig(filename)
        # plt.close()

        results[(city, country, year, target)] = {
          "coefficients": coeffs,
          "intercept": intercept
        }

# save to file
file_path = "regression_results.pickle"
with open(file_path, "wb") as file:
  pickle.dump(results, file)