#  Data Modeling: Neural Network

## 1. Set up environment

In [None]:
!pip install tensorflow

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# Load your data
data = pd.read_csv('D:/KHTN/NMKHDL/hcmus_nmkhdl_project/data/processed/data.csv')

# Define numerical and categorical features
numerical_features = ['tomatometer_score', 'tomatometer_count', 'audience_count', 'runtime']
categorical_features = ['genre', 'classification', 'original_language']

# Select specific features
selected_features = ['tomatometer_score', 'tomatometer_count', 'audience_count', 'runtime', 'genre', 'classification', 'original_language']

#Handle missing values
data.dropna(subset=numerical_features+['audience_score'], inplace=True)  # or use fillna() with an appropriate strategy

# Select features and target variable
X = data[selected_features]
y = data['audience_score']

## 2. Transformers 

In [None]:
# Create transformers for numerical and categorical features
numerical_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(sparse=False, drop='first', handle_unknown='ignore'))
])

# Combine transformers into a ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the ColumnTransformer on the training data
preprocessor.fit(X_train)

## 3. Neural Network Model

In [None]:
# Combine preprocessing with model training in a pipeline
modelNN = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', Sequential([
        Dense(64, activation='relu', input_dim=preprocessor.transform(X_train).shape[1]),
        Dense(32, activation='relu'),
        Dense(1, activation='linear')
    ]))
])

In [None]:
# Compile the Keras model
modelNN.named_steps['regressor'].compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history=modelNN.named_steps['regressor'].fit(preprocessor.transform(X_train), y_train, epochs=50, batch_size=32, verbose=1, validation_split=0.2)

y_pred=modelNN.predict(X_test)
mse=mean_squared_error(y_test,y_pred)

print(f'Mean Square Error: {mse}')

## 4. Training Process

In [None]:
# Visualize training loss over epochs
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Mean Squared Error')
plt.title('Training Process of Neural Network')
plt.legend()
plt.show()