<a href="https://colab.research.google.com/github/Panda1304/Emotion_Embedding_Ratings/blob/main/Ratings_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.base import BaseEstimator, RegressorMixin

train_data = pd.read_csv('/content/train.csv')
test_data = pd.read_csv('/content/test.csv')

X_train = train_data.drop(columns=['ID', 'score']).values
y_train = train_data['score'].values
X_test = test_data.drop(columns=['ID']).values

scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

def create_cnn_model(filters=128, kernel_size=5, dropout_rate=0.5, learning_rate=0.001):
    cnn_input = layers.Input(shape=(64,))  # 64-dimensional embeddings
    x = layers.Reshape((64, 1))(cnn_input)
    x = layers.Conv1D(filters, kernel_size, activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(2)(x)
    x = layers.Conv1D(filters*2, kernel_size, activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(2)(x)
    x = layers.Conv1D(filters*4, kernel_size, activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(2)(x)
    x = layers.Flatten()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dense(1, activation='linear')(x)

    model = models.Model(inputs=cnn_input, outputs=x)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

class KerasRegressorCustom(BaseEstimator, RegressorMixin):
    def __init__(self, filters=128, kernel_size=5, dropout_rate=0.5, learning_rate=0.001, epochs=30, batch_size=32):
        self.filters = filters
        self.kernel_size = kernel_size
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = None

    def fit(self, X, y):
        self.model = create_cnn_model(filters=self.filters, kernel_size=self.kernel_size,
                                      dropout_rate=self.dropout_rate, learning_rate=self.learning_rate)
        self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=0)
        return self

    def predict(self, X):
        return self.model.predict(X)

cnn_model = KerasRegressorCustom()

cnn_param_dist = {
    'filters': [64, 128, 256],
    'kernel_size': [3, 5, 7],
    'dropout_rate': [0.2, 0.3, 0.5],
    'learning_rate': [0.0001, 0.001, 0.01]
}

random_search_cnn = GridSearchCV(cnn_model, cnn_param_dist, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
random_search_cnn.fit(X_train_normalized, y_train)

print(f"Best CNN Parameters: {random_search_cnn.best_params_}")

best_cnn_model = random_search_cnn.best_estimator_
train_cnn_features = best_cnn_model.predict(X_train_normalized)
test_cnn_features = best_cnn_model.predict(X_test_normalized)

n_components = min(50, train_cnn_features.shape[1])
pca = PCA(n_components=n_components)
train_cnn_features_pca = pca.fit_transform(train_cnn_features)
test_cnn_features_pca = pca.transform(test_cnn_features)

knn_param_dist = {
    'n_neighbors': np.arange(1, 21),
    'p': [1, 2],
    'weights': ['uniform', 'distance'],
    'leaf_size': np.arange(20, 60)
}

knn_model = KNeighborsRegressor()

random_search_knn = GridSearchCV(knn_model, knn_param_dist, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
random_search_knn.fit(train_cnn_features_pca, y_train)

print(f"Best KNN Parameters: {random_search_knn.best_params_}")

best_knn_model = random_search_knn.best_estimator_
best_knn_model.fit(train_cnn_features_pca, y_train)

predictions = best_knn_model.predict(test_cnn_features_pca)

submission = pd.DataFrame({
    'ID': test_data['ID'],
    'score': predictions
})

submission.to_csv('submission.csv', index=False)

y_pred_train = best_knn_model.predict(train_cnn_features_pca)
mse = mean_squared_error(y_train, y_pred_train)
print(f"Final MSE: {mse:.4f}")

print("Submission file created: 'submission.csv'")


KeyboardInterrupt: 

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
import joblib

In [None]:
if not tf.executing_eagerly():
    tf.compat.v1.enable_eager_execution()


In [None]:
train_data = pd.read_csv('/content/train.csv')

In [None]:
X = train_data.drop(columns=['ID', 'score']).values
y = train_data['score'].values

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=22)

In [None]:
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_val_imputed = imputer.transform(X_val)

In [None]:
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train_imputed)
X_val_normalized = scaler.transform(X_val_imputed)

In [None]:
cnn_input = layers.Input(shape=(64,))  # 64-dimensional embeddings
x = layers.Reshape((64, 1))(cnn_input)
x = layers.Conv1D(128, 3, activation='relu')(x)
x = layers.MaxPooling1D(2)(x)
x = layers.Conv1D(256, 3, activation='relu')(x)
x = layers.MaxPooling1D(2)(x)
x = layers.Flatten()(x)
x = layers.Dense(32, activation='relu')(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(1, activation='linear')(x)


In [None]:
cnn_model = models.Model(inputs=cnn_input, outputs=x)
cnn_model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [None]:
cnn_model.fit(X_train_normalized, y_train, validation_data=(X_val_normalized, y_val),
              epochs=100, batch_size=32, verbose=1, callbacks=[early_stopping])

Epoch 1/100
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 23ms/step - loss: 1.9179 - val_loss: 0.7862
Epoch 2/100
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 22ms/step - loss: 1.1551 - val_loss: 0.7604
Epoch 3/100
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 22ms/step - loss: 1.0497 - val_loss: 0.7443
Epoch 4/100
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 23ms/step - loss: 0.9632 - val_loss: 0.7889
Epoch 5/100
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 22ms/step - loss: 0.8998 - val_loss: 0.7301
Epoch 6/100
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 23ms/step - loss: 0.8493 - val_loss: 0.7245
Epoch 7/100
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 22ms/step - loss: 0.8111 - val_loss: 0.7570
Epoch 8/100
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 23ms/step - loss: 0.7913 - val_loss: 0.7287
Epoch 9/100
[1m

<keras.src.callbacks.history.History at 0x78376519ef50>

In [None]:
train_cnn_features = cnn_model.predict(X_train_normalized)
val_cnn_features = cnn_model.predict(X_val_normalized)

[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step


In [None]:
knn = KNeighborsRegressor(n_neighbors=25190, p=4,weights='distance')
knn.fit(train_cnn_features, y_train)

In [None]:
val_predictions = knn.predict(val_cnn_features)
mse = mean_squared_error(y_val, val_predictions)
print(f"Mean Squared Error on Validation Set: {mse}")

Mean Squared Error on Validation Set: 0.7667840550711658


In [None]:
cnn_model.save('cnn_model.h5')
joblib.dump(knn, 'knn_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(imputer, 'imputer.pkl')



['imputer.pkl']

In [None]:
test_data = pd.read_csv('/content/test_1.csv')

In [None]:
X_test = test_data.drop(columns=['ID']).values

In [None]:
imputer = joblib.load('/content/imputer.pkl')
X_test_imputed = imputer.transform(X_test)

In [None]:
scaler = joblib.load('/content/scaler.pkl')
X_test_normalized = scaler.transform(X_test_imputed)

In [None]:
cnn_model = tf.keras.models.load_model('/content/cnn_model.h5')



In [None]:
test_cnn_features = cnn_model.predict(X_test_normalized)

[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


In [None]:
knn = joblib.load('knn_model.pkl')

In [None]:
test_predictions = knn.predict(test_cnn_features)


In [None]:
submission = pd.DataFrame({
    'ID': test_data['ID'],
    'score': test_predictions
})

In [None]:
submission.to_csv('submission.csv', index=False)

In [None]:
print("Test predictions saved to 'submission.csv'")

Test predictions saved to 'submission.csv'
