In [29]:
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LinearRegression

In [30]:
# Load dataset
df = pd.read_csv("C:/Users/aashi/OneDrive/Desktop/Hackathon/Ingenious_7_0_SheCodes/data/derived_data.csv")

# Ensure date format
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date').reset_index(drop=True)

In [31]:
with open("health_risk_model.pkl", "rb") as f:
    health_model = pickle.load(f)

# MUST MATCH TRAINING ORDER
FEATURES = [
    'temperature',
    'heat_index',
    'humidity',
    'rainfall',
    'aqi'
]

In [32]:
def forecast_next_7_days(series):
    series = series.dropna().reset_index(drop=True)
    X = np.arange(len(series)).reshape(-1, 1)
    y = series.values

    model = LinearRegression()
    model.fit(X, y)

    future_X = np.arange(len(series), len(series) + 7).reshape(-1, 1)
    return model.predict(future_X)

In [33]:
aqi_f = forecast_next_7_days(df['aqi'])
temp_f = forecast_next_7_days(df['temperature'])
humidity_f = forecast_next_7_days(df['humidity'])
rainfall_f = forecast_next_7_days(df['rainfall'])

In [34]:
def heat_index(temp, humidity):
    return (
        -8.784695 +
        1.61139411 * temp +
        2.338549 * humidity -
        0.14611605 * temp * humidity
    )

heat_index_f = heat_index(temp_f, humidity_f)

In [36]:
future_input = pd.DataFrame({
    'temperature': temp_f,
    'heat_index': heat_index_f,
    'humidity': humidity_f,
    'rainfall': rainfall_f,
    'aqi': aqi_f
})
future_input = future_input[FEATURES]


In [37]:
health_score_f = health_model.predict(future_input)

In [38]:
future_dates = pd.date_range(
    start=df['date'].max() + pd.Timedelta(days=1),
    periods=7
)

forecast_df = pd.DataFrame({
    'date': future_dates,
    'predicted_aqi': aqi_f,
    'predicted_temperature': temp_f,
    'predicted_rainfall': rainfall_f,
    'predicted_health_risk_score': health_score_f
})

In [39]:
def risk_level(score):
    if score < 40:
        return "Low"
    elif score < 70:
        return "Medium"
    else:
        return "High"

forecast_df['risk_level'] = forecast_df['predicted_health_risk_score'].apply(risk_level)

print(forecast_df)

        date  predicted_aqi  predicted_temperature  predicted_rainfall  \
0 2026-01-17     200.347966              26.956659            2.834925   
1 2026-01-18     200.347716              26.956504            2.835170   
2 2026-01-19     200.347465              26.956349            2.835416   
3 2026-01-20     200.347214              26.956195            2.835661   
4 2026-01-21     200.346964              26.956040            2.835906   
5 2026-01-22     200.346713              26.955885            2.836152   
6 2026-01-23     200.346462              26.955730            2.836397   

   predicted_health_risk_score risk_level  
0                     0.599114        Low  
1                     0.599114        Low  
2                     0.599114        Low  
3                     0.599114        Low  
4                     0.599114        Low  
5                     0.599114        Low  
6                     0.599114        Low  
