In [5]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# -------------------------------
# Step 1: Load Cleaned Data
# -------------------------------
cleaned_file_path = r"C:\Users\vrmhc\Downloads\cleaned_air_quality_dataset.csv"
df = pd.read_csv(cleaned_file_path)

# -------------------------------
# Step 2: Check Columns
# -------------------------------
print("Columns in dataset:", df.columns.tolist())

# -------------------------------
# Step 3: Choose Target Column
# -------------------------------
# Replace this with the exact column name you want to predict
target = input("Enter the target column name from the list above: ")

# -------------------------------
# Step 4: Select Features & Target
# -------------------------------
# Select all numeric columns except the target
features = [col for col in df.select_dtypes(include=['float64', 'int64']).columns if col != target]

X = df[features]
y = df[target]

# -------------------------------
# Step 5: Split Data
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -------------------------------
# Step 6: Train Model
# -------------------------------
model = LinearRegression()
model.fit(X_train, y_train)

# -------------------------------
# Step 7: Make Predictions
# -------------------------------
y_pred = model.predict(X_test)

# -------------------------------
# Step 8: Evaluate Model
# -------------------------------
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.2f}")


Columns in dataset: ['site_id', 'week', 'year', 'dateoff', 'ca', 'cl', 'hno3', 'hno3_ppb', 'k', 'mg', 'na', 'nh4', 'no3', 'so2', 'so2_ppb', 'so4', 'tno3', 'dateon']


Enter the target column name from the list above:  week



Model Evaluation:
Mean Squared Error (MSE): 58.12
R² Score: -0.00
