In [36]:
import pandas as pd
import numpy as np
import json
from sklearn.linear_model import LinearRegression

In [37]:
years = [2020, 2021, 2022, 2023, 2024]
regions = [
  ("karachi", "pakistan", 24.8607, 67.0011),
  ("lahore", "pakistan", 31.5204, 74.3587),
  ("islamabad", "pakistan", 33.6844, 73.0479),
  ("rawalpindi", "pakistan", 33.5651, 73.0169),
  ("peshawar", "pakistan", 34.0151, 71.5249),
  ("quetta", "pakistan", 30.1798, 66.9750),
  ("multan", "pakistan", 30.1575, 71.5249),
  ("hyderabad", "pakistan", 25.3960, 68.3578),
  ("mumbai", "india", 19.0760, 72.8777),
  ("delhi", "india", 28.7041, 77.1025),
  ("kolkata", "india", 22.5726, 88.3639),
  ("chennai", "india", 13.0827, 80.2707),
  ("bengaluru", "india", 12.9716, 77.5946),
  ("hyderabad", "india", 17.3850, 78.4867),
  ("ahmedabad", "india", 23.0225, 72.5714),
  ("pune", "india", 18.5204, 73.8567),
  ("surat", "india", 21.1702, 72.8311),
  ("jaipur", "india", 26.9124, 75.7873),
  ("lucknow", "india", 26.8467, 80.9462),
  ("patna", "india", 25.5941, 85.1376),
  ("dhaka", "bangladesh", 23.8103, 90.4125),
  ("chittagong", "bangladesh", 22.3569, 91.7832),
  ("colombo", "sri lanka", 6.9271, 79.8612),
  ("kandy", "sri lanka", 7.2906, 80.6337),
  ("kathmandu", "nepal", 27.7172, 85.3240)
]

In [42]:
df = pd.read_csv('master.csv')
df.dropna(inplace=True)
df['year'] = df['date'].map(lambda v: v[:4])

targets = list(df[['PRECTOTCORR', 'PS', 'QV2M', 'T2M', 'U10M', 'V10M']].columns)

In [46]:
def get_data(lat, lon, year, target):
  subset = df[(df['latitude'] == lat) &
              (df['longitude'] == lon) & 
              (df['year'] == year)]
  
  # X = time index (e.g. day of year or just row index)
  X = np.arange(len(subset)).reshape(-1, 1)
  
  # y = target values for that subset
  y = subset[target].values
  
  return X, y

json_results = []

for (city, country, lat, lon) in regions:
  for year in years:
    for target in targets:
      X, y = get_data(lat, lon, str(year), target)
      
      if X is None or len(y) <= 1:
        slope, intercept = None, None
      else:
        model = LinearRegression().fit(X, y)
        slope = float(model.coef_[0])       # make JSON serializable
        intercept = float(model.intercept_) # make JSON serializable
      
      json_results.append({
        "city": city,
        "country": country,
        "latitude": lat,
        "longitude": lon,
        "year": year,
        "target": target,
        "slope": slope,
        "intercept": intercept
      })

# save to file
with open("regression_results.json", "w") as f:
  json.dump(json_results, f, indent=2)

In [47]:
len(results) * len(targets)

750