In [None]:
# Imports and configuration
import os, sqlite3, warnings
from datetime import timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # type: ignore
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Paths (change if needed)
CSV_PATH = r"C:\Users\hp\OneDrive\Desktop\Weather Prediction Model\weather_data.csv"   # <- change this if your CSV is elsewhere
OUTDIR = "/mnt/data"
DB_PATH = os.path.join(OUTDIR, "weather_weather.db")
PRED_CSV_PATH = os.path.join(OUTDIR, "weather_predictions.csv")

print('CSV_PATH =', CSV_PATH)
print('Outputs will be saved to:', OUTDIR)

In [None]:
# Load CSV and basic checks
if not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f'CSV not found at {CSV_PATH}. Upload your CSV or change CSV_PATH.')

df = pd.read_csv(CSV_PATH)
print('Loaded CSV with shape:', df.shape)
display(df.head())

# helper to detect columns
def find_date_column(df):
    for col in df.columns:
        if col.lower() in ('date','datetime','day','timestamp'):
            return col
    for col in df.columns:
        try:
            pd.to_datetime(df[col])
            return col
        except Exception:
            continue
    return None

def find_temp_and_rain_columns(df):
    temp_col = None; rain_col = None
    for col in df.columns:
        lname = col.lower()
        if temp_col is None and ('temp' in lname or 'temperature' in lname):
            temp_col = col
        if rain_col is None and ('rain' in lname or 'precip' in lname):
            rain_col = col
    return temp_col, rain_col

date_col = find_date_column(df)
if date_col is not None:
    df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
    df = df.sort_values(by=date_col).reset_index(drop=True)
else:
    df = df.reset_index().rename(columns={'index':'synthetic_index'})
    date_col = 'synthetic_index'
    df[date_col] = pd.to_datetime(df[date_col], unit='D', origin='1970-01-01')

temp_col, rain_col = find_temp_and_rain_columns(df)
if temp_col is None or rain_col is None:
    raise ValueError('Could not detect temp or rain columns. Ensure names include "temp" and "rain"/"precip".\nFound columns: ' + ', '.join(df.columns))

df[temp_col] = pd.to_numeric(df[temp_col], errors='coerce')
df[rain_col] = pd.to_numeric(df[rain_col], errors='coerce')
df = df.dropna(subset=[temp_col, rain_col], how='all').reset_index(drop=True)

print('Using date_col =', date_col, ', temp_col =', temp_col, ', rain_col =', rain_col)
print('\nLast 5 rows:')
display(df.tail())

# Save raw table to SQLite DB (replace)
os.makedirs(OUTDIR, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
df_for_db = df.copy()
if 'predicted' not in df_for_db.columns:
    df_for_db['predicted'] = 0
df_for_db.to_sql('weather', conn, if_exists='replace', index=False)
conn.commit(); conn.close()
print('Saved raw data to DB at', DB_PATH)

In [None]:
# Pattern analysis (last 7 days) and feature engineering
last7 = df.tail(7).copy()
print('Pattern analysis (last 7 days):')
temps = last7[temp_col].values
rains = last7[rain_col].values
print(' - temp_mean:', float(np.nanmean(temps)))
print(' - temp_median:', float(np.nanmedian(temps)))
print(' - temp_std:', float(np.nanstd(temps)))
print(' - temp_trend:', 'increasing' if temps[-1] > temps[0] else ('decreasing' if temps[-1] < temps[0] else 'flat'))
print(' - rain_total:', float(np.nansum(rains)))
print(' - rain_days:', int(np.sum(~np.isnan(rains) & (rains>0))))

# plots for last 7 days
plt.figure(figsize=(8,3)); plt.plot(last7[date_col], last7[temp_col], marker='o'); plt.title('Temperature - last 7 days'); plt.xlabel('Date'); plt.ylabel('Temperature'); plt.tight_layout(); plt.show()
plt.figure(figsize=(8,3)); plt.plot(last7[date_col], last7[rain_col], marker='o'); plt.title('Rainfall - last 7 days'); plt.xlabel('Date'); plt.ylabel('Rainfall'); plt.tight_layout(); plt.show()

# Feature engineering: lags and rolling features
nlags = 3
df_feat = df[[date_col, temp_col, rain_col]].copy().set_index(date_col)
for lag in range(1, nlags+1):
    df_feat[f'temp_lag_{lag}'] = df_feat[temp_col].shift(lag)
    df_feat[f'rain_lag_{lag}'] = df_feat[rain_col].shift(lag)
df_feat['temp_roll_3'] = df_feat[temp_col].rolling(window=3, min_periods=1).mean().shift(1)
df_feat['rain_roll_3'] = df_feat[rain_col].rolling(window=3, min_periods=1).mean().shift(1)

df_clean = df_feat.dropna().copy()
print('\nAfter feature creation, rows available for modelling:', len(df_clean))
display(df_clean.head())