In [33]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import KNNImputer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the training and testing datasets
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

train_df.drop(columns='DATE', inplace=True)
test_df.drop(columns='DATE', inplace=True)
train_df.drop(columns='Unnamed: 0', inplace=True)
test_df.drop(columns='INDEX', inplace=True)

# Apply KNN Imputation to handle missing values in the training data
knn_imputer = KNNImputer(n_neighbors=5)
train_df_imputed = pd.DataFrame(knn_imputer.fit_transform(train_df), columns=train_df.columns)

# Define features and target for the training data
features = [
    'LATITUDE_A', 'LONGITUDE_A', 'ELEVATION_A', 'PRCP_A', 'SNWD_A', 'TMAX_A', 'TMIN_A',
    'LATITUDE_B', 'LONGITUDE_B', 'ELEVATION_B', 'PRCP_B', 'SNWD_B', 'TMAX_B', 'TMIN_B',
    'LATITUDE_C', 'LONGITUDE_C', 'ELEVATION_C', 'PRCP_C', 'SNWD_C', 'TMAX_C', 'TMIN_C',
    'LATITUDE', 'LONGITUDE'
]

target = 'TAVG'  # Assuming this is the column name for City D's average temperature

# Ensure the feature names match exactly in both datasets
X_train = train_df_imputed[features]
y_train = train_df_imputed[target]

# Train the RandomForestRegressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
X_test = test_df[features]
# Make predictions on the testing data
y_pred = model.predict(X_test)

# If the true target values for the test set are available, evaluate the model
# Uncomment the following lines if you have the true values for the test set
# y_test = test_df_imputed[target]
# mae = mean_absolute_error(y_test, y_pred)
# mse = mean_squared_error(y_test, y_pred)
# rmse = mse ** 0.5
# r2 = r2_score(y_test, y_pred)

# print(f"Mean Absolute Error: {mae}")
# print(f"Mean Squared Error: {mse}")
# print(f"Root Mean Squared Error: {rmse}")
# print(f"R^2 Score: {r2}")

# Print or save predictions
print(y_pred)

# Optionally, save predictions to a CSV file
# predictions_df = pd.DataFrame({'Predicted_TAVG_D': y_pred})
# predictions_df.to_csv('predictions.csv', index=False)


[  0.396  11.403  11.156   9.209  18.671  -0.561  10.445   7.976  11.228
  21.247   5.295   6.147  10.94   25.154  19.765  -1.703  21.025  26.584
  -0.982   2.303  25.173  -3.47    2.854   3.093  19.421   1.695   4.508
  24.38   10.7     5.492  22.017  11.203  20.85   24.125   2.136  19.501
  12.311  22.021  -5.435  -0.283  11.874  -7.851  -7.758  11.238  14.828
  25.592  19.265  -6.057 -16.344  -3.368  -0.255  11.726  22.856  -2.352
   4.816  16.678   2.421   1.284  21.471 -13.299   4.674  15.869   0.955
  22.023  14.122  -1.027  10.993   2.434   8.004  11.639  15.316   1.699
   9.573  19.657  11.6    21.041  11.95    3.462   4.919  -5.664   7.787
  10.368  19.77   10.538  10.725   0.745   9.599   3.039   8.529  -7.939
  10.379  -3.817  12.367  14.926  11.987   2.721 -12.266  23.377  12.244
   5.448  -1.278  -5.387  19.201  -4.061  -4.437  11.096   0.076   1.337
  11.069  19.895  -2.002  16.963  10.583  24.034  23.403   2.223  18.466
   1.688  10.423  -6.751  -1.427  15.355 -11.744  2

In [34]:
df = pd.DataFrame({'TAVG':y_pred})

In [35]:
df.head()

Unnamed: 0,TAVG
0,0.396
1,11.403
2,11.156
3,9.209
4,18.671


In [36]:
df.to_csv('Braves submission 2.csv')

In [37]:
import joblib

# Save the model
joblib.dump(model, 'temperature_predictor_model.pkl')


['temperature_predictor_model.pkl']