In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import datetime

# 1. LOAD DATA
print("Loading Master Data...")
df = pd.read_csv('/Users/singhs/anaconda_projects/UIDAI/master_uidai_data_cleaned.csv')
df['date'] = pd.to_datetime(df['date'])

# 2. PREPARE TRAINING DATA
# We will predict "Total Enrolments" per month
print("Training AI Model...")
data = df[df['Type'] == 'Enrolment'].groupby('date')['Count'].sum().reset_index()

# Convert dates to "Ordinal" numbers (computer-readable time) so the AI can understand them
data['date_ordinal'] = data['date'].map(datetime.datetime.toordinal)

# Define X (Time) and Y (Enrolments)
X = data[['date_ordinal']]
y = data['Count']

# 3. TRAIN MODEL
model = LinearRegression()
model.fit(X, y)

# 4. PREDICT FUTURE (Next 6 Months)
last_date = data['date'].max()
future_dates = [last_date + datetime.timedelta(days=30*i) for i in range(1, 7)]
future_ordinal = np.array([d.toordinal() for d in future_dates]).reshape(-1, 1)

predictions = model.predict(future_ordinal)

# 5. SAVE RESULTS
forecast_df = pd.DataFrame({
    'Date': future_dates,
    'Predicted_Count': predictions.astype(int),
    'Type': 'AI_Forecast'
})

# Combine with past data for a complete picture
final_forecast = pd.concat([data[['date', 'Count']].rename(columns={'Count': 'Actual_Count'}), forecast_df])
final_forecast.to_csv('forecast_data.csv', index=False)

print("\n--- AI REPORT ---")
print(f"Model Slope: {model.coef_[0]:.2f} (This means enrolments are changing by this amount per day)")
print("Forecast generated! Saved to 'forecast_data.csv'.")

Loading Master Data...
Training AI Model...

--- AI REPORT ---
Model Slope: -43.51 (This means enrolments are changing by this amount per day)
Forecast generated! Saved to 'forecast_data.csv'.


