In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load dataset
data = pd.read_csv("/mnt/data/air quality data.csv")

# Display dataset information
print("Dataset Overview:")
print(data.head())
print(data.info())

# Handling missing values
data = data.dropna()

# Ensure target column exists
if 'AirQualityIndex' not in data.columns:
    raise ValueError("The dataset does not contain a column named 'AirQualityIndex'. Please check column names.")

# Splitting features and target variable
X = data.drop(columns=['AirQualityIndex'])  # Replace with actual target column name
y = data['AirQualityIndex']

# Ensure there are no non-numeric columns
X = X.select_dtypes(include=[np.number])

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Linear Regression Model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)

# Random Forest Model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# Model Evaluation
def evaluate_model(y_test, y_pred, model_name):
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    print(f"{model_name} Performance:")
    print(f"Mean Absolute Error: {mae}")
    print(f"Mean Squared Error: {mse}")
    print(f"Root Mean Squared Error: {rmse}")
    print("-------------------------")

# Evaluate both models
evaluate_model(y_test, y_pred_lr, "Linear Regression")
evaluate_model(y_test, y_pred_rf, "Random Forest")

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/air quality data.csv'