In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# Load data
data = pd.read_csv("train.csv")

# Prepare data
X = data.drop(columns=['date_time', 'maxtempC'])
y = data['maxtempC']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a function for manual cross-validation and RMSE calculation for Keras models
def manual_cross_val_rmse(model, X, y, cv):
    kf = KFold(n_splits=cv, shuffle=True, random_state=42)
    rmse_scores = []
    for train_index, val_index in kf.split(X):
        X_train_fold, X_val_fold = X[train_index], X[val_index]
        y_train_fold, y_val_fold = y[train_index], y[val_index]
        model.fit(X_train_fold, y_train_fold, epochs=150, batch_size=32, verbose=0)
        y_pred = model.predict(X_val_fold)
        rmse_scores.append(np.sqrt(mean_squared_error(y_val_fold, y_pred)))
    return np.mean(rmse_scores)

# Initialize lists to store model names and RMSE scores
models = []
rmses = []

# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_rmse = np.sqrt(mean_squared_error(y_test, lr.predict(X_test)))
lr_cv_rmse = np.mean(cross_val_score(lr, X, y, cv=5, scoring="neg_mean_squared_error"))
models.append('Linear Regression')
rmses.append(lr_rmse)

# Lasso Regression
lasso = Lasso()
lasso.fit(X_train, y_train)
lasso_rmse = np.sqrt(mean_squared_error(y_test, lasso.predict(X_test)))
lasso_cv_rmse = np.mean(cross_val_score(lasso, X, y, cv=5, scoring="neg_mean_squared_error"))
models.append('Lasso')
rmses.append(lasso_rmse)

# Ridge Regression
ridge = Ridge()
ridge.fit(X_train, y_train)
ridge_rmse = np.sqrt(mean_squared_error(y_test, ridge.predict(X_test)))
ridge_cv_rmse = np.mean(cross_val_score(ridge, X, y, cv=5, scoring="neg_mean_squared_error"))
models.append('Ridge')
rmses.append(ridge_rmse)

# Support Vector Machine
svm = SVR()
svm.fit(X_train, y_train)
svm_rmse = np.sqrt(mean_squared_error(y_test, svm.predict(X_test)))
svm_cv_rmse = np.mean(cross_val_score(svm, X, y, cv=5, scoring="neg_mean_squared_error"))
models.append('SVM')
rmses.append(svm_rmse)

# Random Forest
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
rf_rmse = np.sqrt(mean_squared_error(y_test, rf.predict(X_test)))
rf_cv_rmse = np.mean(cross_val_score(rf, X, y, cv=5, scoring="neg_mean_squared_error"))
models.append('Random Forest')
rmses.append(rf_rmse)

# LSTM Model
X_lstm = X.values.reshape((X.shape[0], 1, X.shape[1]))
lstm_model = Sequential()
lstm_model.add(LSTM(50, activation='relu', input_shape=(1, X.shape[1])))
lstm_model.add(Dense(1))
lstm_model.compile(optimizer='adam', loss='mse')

lstm_rmse = np.sqrt(mean_squared_error(y_test, lstm_model.predict(X_test.reshape((X_test.shape[0], 1, X_test.shape[1])))))
lstm_cv_rmse = manual_cross_val_rmse(lstm_model, X_lstm, y.values, cv=5)
models.append('LSTM')
rmses.append(lstm_rmse)

# GRU Model
gru_model = Sequential()
gru_model.add(GRU(50, activation='relu', input_shape=(1, X.shape[1])))
gru_model.add(Dense(1))
gru_model.compile(optimizer='adam', loss='mse')

gru_rmse = np.sqrt(mean_squared_error(y_test, gru_model.predict(X_test.reshape((X_test.shape[0], 1, X_test.shape[1])))))
gru_cv_rmse = manual_cross_val_rmse(gru_model, X_lstm, y.values, cv=5)
models.append('GRU')
rmses.append(gru_rmse)

# CNN Model
X_cnn = X.values.reshape((X.shape[0], X.shape[1], 1))
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(50, activation='relu'))
cnn_model.add(Dense(1))
cnn_model.compile(optimizer='adam', loss='mse')

cnn_rmse = np.sqrt(mean_squared_error(y_test, cnn_model.predict(X_test.reshape((X_test.shape[0], X_test.shape[1], 1)))))
cnn_cv_rmse = manual_cross_val_rmse(cnn_model, X_cnn, y.values, cv=5)
models.append('CNN')
rmses.append(cnn_rmse)

# Create a DataFrame from the results
results = pd.DataFrame({'Model': models, 'RMSE': rmses, 'Cross-Validation RMSE': [lr_cv_rmse, lasso_cv_rmse, ridge_cv_rmse, svm_cv_rmse, rf_cv_rmse, lstm_cv_rmse, gru_cv_rmse, cnn_cv_rmse]})

# Display the DataFrame
print(results)

# RMSE Comparison
plt.figure(figsize=(10, 6))
plt.bar(models, rmses, color='skyblue', label='Test RMSE')
plt.bar(models, results['Cross-Validation RMSE'], color='orange', label='Cross-Validation RMSE')
plt.xlabel('Models')
plt.ylabel('RMSE')
plt.title('Model Comparison')
plt.xticks(rotation=45)
plt.legend()
plt.show()


  super().__init__(**kwargs)


AttributeError: 'DataFrame' object has no attribute 'reshape'

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# Load data
data = pd.read_csv("train.csv")

# Prepare data
X = data.drop(columns=['date_time', 'maxtempC'])
y = data['maxtempC']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a function for manual cross-validation and RMSE calculation for Keras models
def manual_cross_val_rmse(model, X, y, cv):
  kf = KFold(n_splits=cv, shuffle=True, random_state=42)
  rmse_scores = []
  for train_index, val_index in kf.split(X):
    X_train_fold, X_val_fold = X[train_index], X[val_index]
    y_train_fold, y_val_fold = y[train_index], y[val_index]
    model.fit(X_train_fold, y_train_fold, epochs=150, batch_size=32, verbose=0)
    y_pred = model.predict(X_val_fold)
    rmse_scores.append(np.sqrt(mean_squared_error(y_val_fold, y_pred)))
  return np.mean(rmse_scores)

# Initialize lists to store model names and RMSE scores
models = []
rmses = []

# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_rmse = np.sqrt(mean_squared_error(y_test, lr.predict(X_test)))
lr_cv_rmse = np.mean(cross_val_score(lr, X, y, cv=5, scoring="neg_mean_squared_error"))
models.append('Linear Regression')
rmses.append(lr_rmse)

# Lasso Regression
lasso = Lasso()
lasso.fit(X_train, y_train)
lasso_rmse = np.sqrt(mean_squared_error(y_test, lasso.predict(X_test)))
lasso_cv_rmse = np.mean(cross_val_score(lasso, X, y, cv=5, scoring="neg_mean_squared_error"))
models.append('Lasso')
rmses.append(lasso_rmse)

# Ridge Regression
ridge = Ridge()
ridge.fit(X_train, y_train)
ridge_rmse = np.sqrt(mean_squared_error(y_test, ridge.predict(X_test)))
ridge_cv_rmse = np.mean(cross_val_score(ridge, X, y, cv=5, scoring="neg_mean_squared_error"))
models.append('Ridge')
rmses.append(ridge_rmse)

# Support Vector Machine
svm = SVR()
svm.fit(X_train, y_train)
svm_rmse = np.sqrt(mean_squared_error(y_test, svm.predict(X_test)))
svm_cv_rmse = np.mean(cross_val_score(svm, X, y, cv=5, scoring="neg_mean_squared_error"))
models.append('SVM')
rmses.append(svm_rmse)

# Random Forest
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
rf_rmse = np.sqrt(mean_squared_error(y_test, rf.predict(X_test)))
rf_cv_rmse = np.mean(cross_val_score(rf, X, y, cv=5, scoring="neg_mean_squared_error"))
models.append('Random Forest')
rmses.append(rf_rmse)

# LSTM Model
X_lstm = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))  # Reshape training data
lstm_model = Sequential()
lstm_model.add(LSTM(50, activation='relu', input_shape=(1, X.shape[1])))  # Use X.shape[1] here
lstm_model.add(Dense(1))
lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X_lstm, y_train, epochs=150, batch_size=32, verbose=0)  # Train with reshaped data

lstm_rmse = np.sqrt(mean_squared_error(y_test, lstm_model.predict(X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1])))))
lstm_cv_rmse = manual_cross_val_rmse(lstm_model, X_lstm, y.values, cv=5)  # Use reshaped data for cross-validation
models.append('LSTM')
rmses.append(lstm_rmse)

# GRU Model (similar to LSTM)
X_gru = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
gru_model = Sequential()
gru_model.add(GRU(50, activation='relu', input_shape=(1, X.shape[1])))
gru_model.add(Dense(1))
gru_model.compile(optimizer='adam', loss='mse')
gru_model.fit(X_gru, y_train, epochs=150, batch_size=32, verbose=0)

gru_rmse = np.sqrt(mean_squared_error(y_test, gru_model.predict(X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1])))))
gru_cv_rmse = manual_cross_val_rmse(gru_model, X_gru, y.values, cv=5)


models.append('GRU')
rmses.append(gru_rmse)

# CNN Model
X_cnn = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))  # Reshape training data
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(50, activation='relu'))
cnn_model.add(Dense(1))
cnn_model.compile(optimizer='adam', loss='mse')
cnn_model.fit(X_cnn, y_train, epochs=150, batch_size=32, verbose=0)

cnn_rmse = np.sqrt(mean_squared_error(y_test, cnn_model.predict(X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1)))))
cnn_cv_rmse = manual_cross_val_rmse(cnn_model, X_cnn, y.values, cv=5)
models.append('CNN')
rmses.append(cnn_rmse)

# Create a DataFrame from the results
results = pd.DataFrame({'Model': models, 'RMSE': rmses, 'Cross-Validation RMSE': [lr_cv_rmse, lasso_cv_rmse, ridge_cv_rmse, svm_cv_rmse, rf_cv_rmse, lstm_cv_rmse, gru_cv_rmse, cnn_cv_rmse]})

# Display the DataFrame
print(results)

# RMSE Comparison
plt.figure(figsize=(10, 6))
plt.bar(models, rmses, color='skyblue', label='Test RMSE')
plt.bar(models, results['Cross-Validation RMSE'], color='orange', label='Cross-Validation RMSE')
plt.xlabel('Models')
plt.ylabel('RMSE')
plt.title('Model Comparison')
plt.xticks(rotation=45)
plt.legend()
plt.show()