In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import KNNImputer
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
train_data = pd.read_csv('/kaggle/input/zindi-african-air-quality-prediction-challenge/Train.csv')
test_data = pd.read_csv('/kaggle/input/zindi-african-air-quality-prediction-challenge/Test.csv')

In [None]:
def add_date_features(df):
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day
    df['weekday'] = df['date'].dt.weekday

for data in [train_data, test_data]:
    add_date_features(data)

In [None]:
drop_columns = ['id', 'site_id', 'date']
categorical_features = ['city', 'country']
numerical_features = ['year', 'month', 'day', 'weekday']

X_train = train_data.drop(columns=drop_columns + ['pm2_5'])
y_train = train_data['pm2_5']
X_test = test_data.drop(columns=drop_columns)
ids_test = test_data['id']

In [None]:
numeric_pipeline = make_pipeline(
    KNNImputer(n_neighbors=5),
    StandardScaler()
)

# Create a pipeline for categorical features
categorical_pipeline = make_pipeline(
    OneHotEncoder(handle_unknown='ignore')
)

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_pipeline, categorical_features),
        ('num', numeric_pipeline, numerical_features)
    ])

In [None]:
svr_model = SVR(kernel='rbf', C=359.9463714999908, gamma=0.47732892632361296)
svr_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('svr', svr_model)])

# Model training
svr_pipeline.fit(X_train, y_train)

In [None]:
y_pred = svr_pipeline.predict(X_train)
rmse = mean_squared_error(y_train, y_pred, squared=False)
print("Training RMSE:", rmse)

# Predictions on test data
final_predictions = svr_pipeline.predict(X_test)
predictions_df = pd.DataFrame({
    'id': ids_test,
    'pm2_5': final_predictions
})

In [None]:
predictions_df.to_csv('/kaggle/working/test_predictions_svr14.csv', index=False)