In [2]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.experimental import enable_iterative_imputer 
from sklearn.impute import IterativeImputer

# Load dataset
df=pd.read_csv(r'C:\Users\ASUS\Desktop\Climate1\AQI.csv')  # Update with your file path

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Count unique days per city
city_data_counts = df.groupby('City')['date'].nunique()

# Categorize cities
cities_with_2_years_data = city_data_counts[city_data_counts > 730].index.tolist()
cities_with_less_than_2_years_data = city_data_counts[city_data_counts <= 730].index.tolist()

# Store prediction results
prediction_results = []

for city in df['City'].unique():
    df_city = df[df['City'] == city].copy()
    df_city.set_index('date', inplace=True)
    df_city = df_city.asfreq('D')
    
    # Fill missing values using linear interpolation
    df_city['Index Value'] = df_city['Index Value'].interpolate(method='linear')
    
    # Use Iterative Imputer for consecutive missing values
    imputer = IterativeImputer(max_iter=10, random_state=42)
    df_city[['Index Value']] = imputer.fit_transform(df_city[['Index Value']])
    
    # Define features and target
    df_city['day_of_year'] = df_city.index.dayofyear
    df_city['year'] = df_city.index.year
    X = df_city[['day_of_year', 'year']]
    y = df_city['Index Value']
    
    # Split data for training/testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
    if city in cities_with_2_years_data:
        # Apply Holt-Winters
        seasonal_periods = min(365, len(df_city) // 2)
        model_hw = ExponentialSmoothing(y_train, trend='add', seasonal='add', seasonal_periods=seasonal_periods).fit()
        hw_pred = model_hw.forecast(len(X_test))
        X_train['hw_prediction'] = model_hw.fittedvalues
        X_test['hw_prediction'] = hw_pred
    
    # Train XGBoost model
    model_xgb = XGBRegressor(objective='reg:squarederror', n_estimators=50, learning_rate=0.1, max_depth=3)
    model_xgb.fit(X_train, y_train)
    xgb_pred = model_xgb.predict(X_test)
    
    # Final prediction
    final_prediction = xgb_pred if city in cities_with_less_than_2_years_data else (0.5 * xgb_pred + 0.5 * hw_pred)
    
    # Compute confidence
    mae = mean_absolute_error(y_test, final_prediction)
    confidence = max(0, 100 - (mae / np.mean(y_test)) * 100)
    
    # Store results
    prediction_results.append({
        "City": city,
        "Predicted AQI": round(final_prediction[-1], 2),
        "Prediction Confidence": round(confidence, 2)
    })

# Output predictions
for result in prediction_results:
    print(result)


  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),


ValueError: cannot reindex on an axis with duplicate labels

In [3]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.experimental import enable_iterative_imputer  # Enable IterativeImputer
from sklearn.impute import IterativeImputer

# Load dataset
  # Update with your file path

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Remove duplicate rows based on 'City' and 'date'
df = df.drop_duplicates(subset=['City', 'date'])

# Count unique days per city
city_data_counts = df.groupby('City')['date'].nunique()

# Categorize cities
cities_with_2_years_data = city_data_counts[city_data_counts > 730].index.tolist()
cities_with_less_than_2_years_data = city_data_counts[city_data_counts <= 730].index.tolist()

# Store prediction results
prediction_results = []

for city in df['City'].unique():
    df_city = df[df['City'] == city].copy()
    df_city.set_index('date', inplace=True)
    df_city = df_city[~df_city.index.duplicated(keep='first')]
    df_city = df_city.asfreq('D')
    
    # Fill missing values using linear interpolation
    df_city['Index Value'] = df_city['Index Value'].interpolate(method='linear')
    
    # Use Iterative Imputer for consecutive missing values
    imputer = IterativeImputer(max_iter=10, random_state=42)
    df_city[['Index Value']] = imputer.fit_transform(df_city[['Index Value']])
    
    # Define features and target
    df_city['day_of_year'] = df_city.index.dayofyear
    df_city['year'] = df_city.index.year
    X = df_city[['day_of_year', 'year']]
    y = df_city['Index Value']
    
    # Split data for training/testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
    if city in cities_with_2_years_data:
        # Apply Holt-Winters
        seasonal_periods = min(365, len(df_city) // 2)
        model_hw = ExponentialSmoothing(y_train, trend='add', seasonal='add', seasonal_periods=seasonal_periods).fit()
        hw_pred = model_hw.forecast(len(X_test))
        X_train['hw_prediction'] = model_hw.fittedvalues
        X_test['hw_prediction'] = hw_pred
    
    # Train XGBoost model
    model_xgb = XGBRegressor(objective='reg:squarederror', n_estimators=50, learning_rate=0.1, max_depth=3)
    model_xgb.fit(X_train, y_train)
    xgb_pred = model_xgb.predict(X_test)
    
    # Final prediction
    final_prediction = xgb_pred if city in cities_with_less_than_2_years_data else (0.5 * xgb_pred + 0.5 * hw_pred)
    
    # Compute confidence
    mae = mean_absolute_error(y_test, final_prediction)
    confidence = max(0, 100 - (mae / np.mean(y_test)) * 100)
    
    # Store results
    prediction_results.append({
        "City": city,
        "Predicted AQI": round(final_prediction[-1], 2),
        "Prediction Confidence": round(confidence, 2)
    })

# Output predictions
for result in prediction_results:
    print(result)


  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_prediction[-1], 2),
  "Predicted AQI": round(final_

{'City': 'Chennai', 'Predicted AQI': -48.31, 'Prediction Confidence': 0}
{'City': 'Varanasi', 'Predicted AQI': 666.87, 'Prediction Confidence': 0}
{'City': 'Hyderabad', 'Predicted AQI': 153.58, 'Prediction Confidence': 68.88}
{'City': 'Agra', 'Predicted AQI': 712.28, 'Prediction Confidence': 0}
{'City': 'Pune', 'Predicted AQI': 449.72, 'Prediction Confidence': 0}
{'City': 'Mumbai', 'Predicted AQI': 169.42, 'Prediction Confidence': 69.26}
{'City': 'Patna', 'Predicted AQI': -250.31, 'Prediction Confidence': 0}
{'City': 'Jodhpur', 'Predicted AQI': 336.38, 'Prediction Confidence': 8.33}
{'City': 'Gaya', 'Predicted AQI': -175.54, 'Prediction Confidence': 0}
{'City': 'Nagpur', 'Predicted AQI': 299.45, 'Prediction Confidence': 20.55}
{'City': 'Visakhapatnam', 'Predicted AQI': 147.31, 'Prediction Confidence': 64.99}
{'City': 'Amritsar', 'Predicted AQI': 273.4, 'Prediction Confidence': 46.54}
{'City': 'Thiruvananthapuram', 'Predicted AQI': 23.35, 'Prediction Confidence': 51.7}
{'City': 'Ajmer',

In [5]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.experimental import enable_iterative_imputer  # Enable IterativeImputer
from sklearn.impute import IterativeImputer

# Load dataset
  # Update with your file path

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Remove duplicate rows based on 'City' and 'date'
df = df.drop_duplicates(subset=['City', 'date'])

# Count unique days per city
city_data_counts = df.groupby('City')['date'].nunique()

# Categorize cities
cities_with_2_years_data = city_data_counts[city_data_counts > 730].index.tolist()
cities_with_less_than_2_years_data = city_data_counts[city_data_counts <= 730].index.tolist()

def predict_aqi(city, future_date):
    future_date = pd.to_datetime(future_date)
    if city not in df['City'].unique():
        return {"Error": "City not found in dataset"}
    
    df_city = df[df['City'] == city].copy()
    df_city.set_index('date', inplace=True)
    df_city = df_city[~df_city.index.duplicated(keep='first')]
    df_city = df_city.asfreq('D')
    
    # Fill missing values using linear interpolation
    df_city['Index Value'] = df_city['Index Value'].interpolate(method='linear')
    
    # Use Iterative Imputer for consecutive missing values
    imputer = IterativeImputer(max_iter=10, random_state=42)
    df_city[['Index Value']] = imputer.fit_transform(df_city[['Index Value']])
    
    # Define features and target
    df_city['day_of_year'] = df_city.index.dayofyear
    df_city['year'] = df_city.index.year
    X = df_city[['day_of_year', 'year']]
    y = df_city['Index Value']
    
    # Split data for training/testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
    if city in cities_with_2_years_data:
        # Apply Holt-Winters
        seasonal_periods = min(365, len(df_city) // 2)
        model_hw = ExponentialSmoothing(y_train, trend='add', seasonal='add', seasonal_periods=seasonal_periods).fit()
        hw_pred = model_hw.forecast(steps=1).iloc[0]
    
    # Train XGBoost model
    model_xgb = XGBRegressor(objective='reg:squarederror', n_estimators=50, learning_rate=0.1, max_depth=3)
    model_xgb.fit(X_train, y_train)
    
    future_features = pd.DataFrame({'day_of_year': [future_date.dayofyear], 'year': [future_date.year]})
    xgb_pred = model_xgb.predict(future_features)[0]
    
    # Final prediction
    final_prediction = xgb_pred if city in cities_with_less_than_2_years_data else (0.5 * xgb_pred + 0.5 * hw_pred)
    
    # Compute confidence
    mae = mean_absolute_error(y_test, model_xgb.predict(X_test))
    confidence = max(0, 100 - (mae / np.mean(y_test)) * 100)
    
    return {
        "City": city,
        "Future Date": future_date.strftime('%Y-%m-%d'),
        "Predicted AQI": round(final_prediction, 2),
        "Prediction Confidence": round(confidence, 2)
    }

# Example usage
city_input = input("Enter city name: ")
date_input = input("Enter future date (YYYY-MM-DD): ")
prediction = predict_aqi(city_input, date_input)
print(prediction)

{'City': 'Agra', 'Future Date': '2025-02-25', 'Predicted AQI': 129.84, 'Prediction Confidence': 29.63}


In [12]:
import pandas as pd
import numpy as np
import pickle
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.experimental import enable_iterative_imputer  # Enable IterativeImputer
from sklearn.impute import IterativeImputer

# Load dataset
  # Update with your file path

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Remove duplicate rows based on 'City' and 'date'
df = df.drop_duplicates(subset=['City', 'date'])

# Count unique days per city
city_data_counts = df.groupby('City')['date'].nunique()

# Categorize cities
cities_with_2_years_data = city_data_counts[city_data_counts > 730].index.tolist()
cities_with_less_than_2_years_data = city_data_counts[city_data_counts <= 730].index.tolist()

# Dictionary to store trained models
models = {}

for city in df['City'].unique():
    df_city = df[df['City'] == city].copy()
    df_city.set_index('date', inplace=True)
    df_city = df_city[~df_city.index.duplicated(keep='first')]
    df_city = df_city.asfreq('D')
    
    # Fill missing values using linear interpolation
    df_city['Index Value'] = df_city['Index Value'].interpolate(method='linear')
    
    # Use Iterative Imputer for consecutive missing values
    imputer = IterativeImputer(max_iter=10, random_state=42)
    df_city[['Index Value']] = imputer.fit_transform(df_city[['Index Value']])
    
    # Define features and target
    df_city['day_of_year'] = df_city.index.dayofyear
    df_city['year'] = df_city.index.year
    X = df_city[['day_of_year', 'year']]
    y = df_city['Index Value']
    
    # Split data for training/testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
    model_xgb = XGBRegressor(objective='reg:squarederror', n_estimators=50, learning_rate=0.1, max_depth=3)
    model_xgb.fit(X_train, y_train)
    
    if city in cities_with_2_years_data:
        seasonal_periods = min(365, len(df_city) // 2)
        model_hw = ExponentialSmoothing(y_train, trend='add', seasonal='add', seasonal_periods=seasonal_periods).fit()
        models[city] = (model_xgb, model_hw)
    else:
        models[city] = (model_xgb, None)

# Save models to disk
with open("aqi_prediction_models.pkl", "wb") as f:
    pickle.dump(models, f)

def predict_aqi(city, future_date):
    future_date = pd.to_datetime(future_date)
    with open("aqi_prediction_models.pkl", "rb") as f:
        models = pickle.load(f)
    
    if city not in models:
        return {"Error": "City not found in dataset"}
    
    model_xgb, model_hw = models[city]
    future_features = pd.DataFrame({'day_of_year': [future_date.dayofyear], 'year': [future_date.year]})
    xgb_pred = model_xgb.predict(future_features)[0]
    
    if model_hw:
        hw_pred = model_hw.forecast(steps=1).iloc[0]
        final_prediction = 0.5 * xgb_pred + 0.5 * hw_pred
       
    else:
        final_prediction = xgb_pred
    
    return {
        "City": city,
        "Future Date": future_date.strftime('%Y-%m-%d'),
        "Predicted AQI": round(final_prediction, 2),
       
    }

# Example usage
city_input = input("Enter city name: ")
date_input = input("Enter future date (YYYY-MM-DD): ")
prediction = predict_aqi(city_input, date_input)
print(prediction)


{'City': 'Agra', 'Future Date': '2024-09-18', 'Predicted AQI': 101.66}


In [9]:
import pickle
import pandas as pd

def predict_aqi(city, future_date):
    future_date = pd.to_datetime(future_date)
    
    # Load the saved models
    with open("aqi_prediction_models.pkl", "rb") as f:
        models = pickle.load(f)
    
    if city not in models:
        return {"Error": "City not found in dataset"}
    
    model_xgb, model_hw = models[city]
    future_features = pd.DataFrame({'day_of_year': [future_date.dayofyear], 'year': [future_date.year]})
    xgb_pred = model_xgb.predict(future_features)[0]
    
    if model_hw:
        hw_pred = model_hw.forecast(steps=1).iloc[0]
        final_prediction = 0.5 * xgb_pred + 0.5 * hw_pred
    else:
        final_prediction = xgb_pred
    
    return {
        "City": city,
        "Future Date": future_date.strftime('%Y-%m-%d'),
        "Predicted AQI": round(final_prediction, 2)
    }

# Example usage
city_input = input("Enter city name: ")
date_input = input("Enter future date (YYYY-MM-DD): ")
prediction = predict_aqi(city_input, date_input)
print(prediction)


{'City': 'Agra', 'Future Date': '2025-05-20', 'Predicted AQI': 121.44}


In [17]:
import pandas as pd
import numpy as np
import pickle
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.experimental import enable_iterative_imputer  # Enable IterativeImputer
from sklearn.impute import IterativeImputer

# Load dataset
 # Update with your file path

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Remove duplicate rows based on 'City' and 'date'
df = df.drop_duplicates(subset=['City', 'date'])

# Count unique days per city
city_data_counts = df.groupby('City')['date'].nunique()

# Categorize cities
cities_with_2_years_data = city_data_counts[city_data_counts > 730].index.tolist()
cities_with_less_than_2_years_data = city_data_counts[city_data_counts <= 730].index.tolist()

# Dictionary to store trained models
models = {}
errors = {}

for city in df['City'].unique():
    df_city = df[df['City'] == city].copy()
    df_city.set_index('date', inplace=True)
    df_city = df_city[~df_city.index.duplicated(keep='first')]
    df_city = df_city.asfreq('D')
    
    # Fill missing values using linear interpolation
    df_city['Index Value'] = df_city['Index Value'].interpolate(method='linear')
    
    # Use Iterative Imputer for consecutive missing values
    imputer = IterativeImputer(max_iter=10, random_state=42)
    df_city[['Index Value']] = imputer.fit_transform(df_city[['Index Value']])
    
    # Define features and target
    df_city['day_of_year'] = df_city.index.dayofyear
    df_city['year'] = df_city.index.year
    X = df_city[['day_of_year', 'year']]
    y = df_city['Index Value']
    
    # Split data for training/testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
    model_xgb = XGBRegressor(objective='reg:squarederror', n_estimators=50, learning_rate=0.1, max_depth=3)
    model_xgb.fit(X_train, y_train)
    
    y_pred = model_xgb.predict(X_test)
    error_xgb = mean_absolute_error(y_test, y_pred)
    
    if city in cities_with_2_years_data:
        seasonal_periods = min(365, len(df_city) // 2)
        model_hw = ExponentialSmoothing(y_train, trend='add', seasonal='add', seasonal_periods=seasonal_periods).fit()
        hw_pred = model_hw.forecast(steps=len(y_test))
        error_hw = mean_absolute_error(y_test, hw_pred)
        models[city] = (model_xgb, model_hw)
        errors[city] = (error_xgb, error_hw)
    else:
        models[city] = (model_xgb, None)
        errors[city] = (error_xgb, None)

# Save models and errors to disk
with open("aqi_prediction_models.pkl", "wb") as f:
    pickle.dump(models, f)
with open("aqi_prediction_errors.pkl", "wb") as f:
    pickle.dump(errors, f)

def predict_aqi(city, future_date):
    future_date = pd.to_datetime(future_date)
    with open("aqi_prediction_models.pkl", "rb") as f:
        models = pickle.load(f)
    with open("aqi_prediction_errors.pkl", "rb") as f:
        errors = pickle.load(f)
    
    if city not in models:
        return {"Error": "City not found in dataset"}
    
    model_xgb, model_hw = models[city]
    error_xgb, error_hw = errors.get(city, (None, None))
    future_features = pd.DataFrame({'day_of_year': [future_date.dayofyear], 'year': [future_date.year]})
    xgb_pred = model_xgb.predict(future_features)[0]
    
    if model_hw:
        hw_pred = model_hw.forecast(steps=1).iloc[0]
        final_prediction = 0.5 * xgb_pred + 0.5 * hw_pred
        confidence = max(0, 100 * np.exp(-((error_xgb + (error_hw or 0)) / 50)))
    else:
        final_prediction = xgb_pred
        confidence = max(0, 100 * np.exp(-error_xgb / 50))

    
    return {
        "City": city,
        "Future Date": future_date.strftime('%Y-%m-%d'),
        "Predicted AQI": round(final_prediction, 2),
        "Prediction Confidence": round(confidence, 2)
    }

# Example usage
city_input = input("Enter city name: ")
date_input = input("Enter future date (YYYY-MM-DD): ")
prediction = predict_aqi(city_input, date_input)
print(prediction)

{'City': 'Greater Noida', 'Future Date': '2024-06-06', 'Predicted AQI': 189.02, 'Prediction Confidence': 0.18}
