In [64]:
import numpy as np
import pandas as pd

In [65]:
data = {
    "UserId": [],
    "Image":[],
    "Seller": [],
    "misspell": [],
    "Review": [],
    "Rating": []
}

In [66]:
dummy_data = [
    (1001, 1, 25, 0, "Great product, exceeded my expectations!", 5),
    (1002, 0, 10, 1, "Disappointed with the quality, won't buy again.", 2),
    (1003, 1, 50, 0, "Fast shipping and excellent service!", 4),
    (1004, 1, 12, 1, "The item arrived damaged, poor packaging.", 1),
    (1005, 0, 35, 0, "Amazing value for the price, highly recommended!", 5),
    (1006, 1, 18, 1, "Product is not as described, misleading information.", 2),
    (1007, 0, 40, 0, "Average experience, nothing special.", 3),
    (1008, 1, 30, 0, "Excellent customer support, resolved my issue promptly.", 4),
    (1009, 0, 15, 1, "Poorly made product, regret the purchase.", 1),
    (1010, 1, 22, 1, "Very satisfied with the purchase, good quality.", 5),
    (1011, 0, 28, 1, "Not satisfied with the purchase, poor quality.", 2),
    (1012, 1, 15, 0, "Quick delivery, but the product didn't meet expectations.", 3),
    (1013, 0, 38, 0, "Highly recommended, great value for money!", 5),
    (1014, 1, 20, 1, "Received the wrong item, frustrating experience.", 1),
    (1015, 0, 45, 0, "Good product, reasonable price.", 4),
    (1016, 1, 12, 1, "Misleading advertising, not as described.", 2),
    (1017, 0, 33, 0, "Average quality, nothing exceptional.", 3),
    (1018, 1, 26, 0, "Excellent product, exceeded my expectations!", 5),
    (1019, 0, 18, 1, "Product arrived late, not happy with the service.", 1),
    (1020, 1, 30, 0, "Very pleased with the purchase, great customer service.", 4),
    (1021, 0, 22, 0, "Average shipping time, good product overall.", 4),
    (1022, 1, 42, 1, "Defective item, disappointed with the quality.", 2),
    (1023, 0, 19, 0, "Smooth transaction, would buy again.", 5),
    (1024, 1, 34, 0, "Quality could be better, but decent for the price.", 3),
    (1025, 0, 16, 1, "Poor customer service, unresolved issue.", 1),
    (1026, 1, 25, 0, "Impressed with the product, quick delivery.", 5),
    (1027, 0, 37, 1, "Misleading description, not what I expected.", 2),
    (1028, 1, 14, 0, "Satisfied with the purchase, good value.", 4),
    (1029, 0, 29, 0, "No issues with the product, smooth transaction.", 5),
    (1030, 1, 24, 1, "Received damaged item, poor packaging.", 1),
]

In [67]:
for i in dummy_data:
    data["UserId"].append(i[0]% 5)
    data["Image"].append(i[1])
    data["Seller"].append(i[2])
    data["misspell"].append(i[3])
    data["Review"].append(i[4])
    data["Rating"].append(i[5])

In [68]:
df = pd.DataFrame(data)

In [69]:
df.head()

Unnamed: 0,UserId,Image,Seller,misspell,Review,Rating
0,1,1,25,0,"Great product, exceeded my expectations!",5
1,2,0,10,1,"Disappointed with the quality, won't buy again.",2
2,3,1,50,0,Fast shipping and excellent service!,4
3,4,1,12,1,"The item arrived damaged, poor packaging.",1
4,0,0,35,0,"Amazing value for the price, highly recommended!",5


In [70]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import ElasticNet
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import RandomizedSearchCV

# Assuming df is your DataFrame with columns 'UserId', 'Image', 'Seller', 'misspell', 'Review', 'Rating'
X = df[['UserId', 'Image', 'Seller', 'misspell', 'Review']]
y = df['Rating']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[['UserId', 'Seller', 'Image', 'misspell']])
X_test_scaled = scaler.transform(X_test[['UserId', 'Seller', 'Image', 'misspell']])

# TextVectorization layer for 'Review'
max_words = 1000  # Adjust as needed
vectorizer = CountVectorizer(max_features=max_words, stop_words='english')
X_train_text = vectorizer.fit_transform(X_train['Review'])
X_test_text = vectorizer.transform(X_test['Review'])

# Combine numerical features and text features
X_train_combined = pd.concat([pd.DataFrame(X_train_scaled), pd.DataFrame(X_train_text.toarray())], axis=1)
X_test_combined = pd.concat([pd.DataFrame(X_test_scaled), pd.DataFrame(X_test_text.toarray())], axis=1)

param_grid = {
    'alpha': [0.001, 0.01, 0.1, 1, 10],
    'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]
}

elastic_net = ElasticNet()

random_search = RandomizedSearchCV(elastic_net, param_distributions=param_grid, n_iter=10, scoring='neg_mean_squared_error', cv=5)
random_search.fit(X_train_combined, y_train)

best_alpha = random_search.best_params_['alpha']
best_l1_ratio = random_search.best_params_['l1_ratio']


# Create a new ElasticNet model with the best hyperparameters
best_elastic_net = ElasticNet(alpha=best_alpha, l1_ratio=best_l1_ratio)
best_elastic_net.fit(X_train_combined, y_train)

# Make predictions using the best model
y_pred_train = best_elastic_net.predict(X_train_combined)
y_pred_test = best_elastic_net.predict(X_test_combined)


# Print predictions for the first 10 samples in the training set
print("Predictions on Training Set: {}".format(y_pred_train[:10]))

# Print predictions for the first 10 samples in the test set
print("Predictions on Test Set: {}".format(y_pred_test[:10]))

# Evaluate the model
train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)

print("Training Mean Squared Error: {}".format(train_mse))
print("Test Mean Squared Error: {}".format(test_mse))

print("Training Root Mean Squared Error: {}".format(np.sqrt(train_mse)))
print("Test Root Mean Squared Error: {}".format(np.sqrt(test_mse)))

  model = cd_fast.enet_coordinate_descent(


Predictions on Training Set: [4.90351804 1.11556258 4.99031846 4.85114781 4.87180944 3.17501393
 1.98455054 1.04802428 3.18435892 4.93388854]
Predictions on Test Set: [4.39353037 1.97731086 4.20109727 4.47145644 1.86584843 2.05700813]
Training Mean Squared Error: 0.011066503436298253
Test Mean Squared Error: 1.8813780937924047
Training Root Mean Squared Error: 0.10519744976138087
Test Root Mean Squared Error: 1.3716333671183436


In [71]:
np.round(y_pred_test)

array([4., 2., 4., 4., 2., 2.])

In [72]:
y_test

27    4
15    2
23    3
17    5
8     1
9     5
Name: Rating, dtype: int64

In [73]:
model.save("Model.h5")

In [74]:
import joblib

In [76]:
!pip show keras

Name: keras
Version: 3.0.5
Summary: Multi-backend Keras.
Home-page: https://github.com/keras-team/keras
Author: Keras team
Author-email: keras-users@googlegroups.com
License: Apache License 2.0
Location: /opt/conda/lib/python3.10/site-packages
Requires: absl-py, dm-tree, h5py, ml-dtypes, namex, numpy, rich
Required-by: keras-tuner, tensorflow
