In [26]:
# خلية 1: استيراد المكتبات
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

In [27]:
data = pd.read_csv('data/steam_games.csv')

In [28]:
data.head()

Unnamed: 0,steam_appid,name,developers,publishers,categories,genres,required_age,n_achievements,platforms,is_released,...,additional_content,total_reviews,total_positive,total_negative,review_score,review_score_desc,positive_percentual,metacritic,is_free,price_initial (USD)
0,2719580,勇者の伝説の勇者,['ぽけそう'],['ぽけそう'],"['Single-player', 'Family Sharing']","['Casual', 'Indie']",0,0,['windows'],True,...,[],0,0,0,0.0,No user reviews,0.0,0,False,0.99
1,2719590,Light No Fire,['Hello Games'],['Hello Games'],"['Single-player', 'Multi-player', 'Co-op', 'On...","['Action', 'Adventure']",0,0,['windows'],False,...,[],0,0,0,0.0,No user reviews,0.0,0,False,0.0
2,2719600,Lorhaven: Cursed War,['GoldenGod Games'],['GoldenGod Games'],"['Single-player', 'Multi-player', 'PvP', 'Shar...","['RPG', 'Strategy']",0,32,"['windows', 'mac']",True,...,[],9,8,1,0.0,9 user reviews,88.9,0,False,9.99
3,2719610,PUIQ: Demons,['Giammnn'],['Giammnn'],"['Single-player', 'Steam Achievements', 'Famil...","['Action', 'Casual', 'Indie', 'RPG']",0,28,['windows'],True,...,[],0,0,0,0.0,No user reviews,0.0,0,False,2.99
4,2719650,Project XSTING,['Saucy Melon'],['Saucy Melon'],"['Single-player', 'Steam Achievements', 'Steam...","['Action', 'Casual', 'Indie', 'Early Access']",0,42,['windows'],True,...,[],9,9,0,0.0,9 user reviews,100.0,0,False,7.99


In [29]:
data.shape

(71429, 21)

In [30]:
# معالجة القيم المفقودة
data.fillna({'total_reviews': 0, 'total_positive': 0, 'total_negative': 0, 'n_achievements': 0}, inplace=True)

In [31]:
# حساب عمر اللعبة
data['release_date'] = pd.to_datetime(data['release_date'], errors='coerce')
data['age'] = (pd.Timestamp.now() - data['release_date']).dt.days / 365

In [32]:
# خلية 3: تحويل الأنواع إلى متجهات باستخدام OneHotEncoder
data['genres'] = data['genres'].fillna('').apply(lambda x: x.split(','))

In [33]:
# معالجة الأنواع
data['genres'] = data['genres'].apply(lambda x: ','.join(x) if isinstance(x, list) else x)  # تحويل القوائم إلى سلاسل
data['genres'] = data['genres'].fillna('')  # معالجة القيم المفقودة

In [34]:
mlb = OneHotEncoder() 

In [35]:
genres_encoded = mlb.fit_transform(data['genres'].apply(lambda x: ','.join(x)).values.reshape(-1, 1)).toarray()

In [36]:
#  التنبؤ بتقييمات اللعبة
X_rating = data[['total_reviews', 'total_positive', 'total_negative', 'n_achievements', 'required_age']]
y_rating = data['positive_percentual']

In [37]:
model_rf = RandomForestRegressor(random_state=42)
model_rf.fit(X_rating, y_rating)

In [38]:
# خلية 5: التنبؤ بعمر اللعبة قبل أن تصبح غير شائعة
threshold = data['positive_percentual'].median()
data['is_popular'] = data['positive_percentual'].apply(lambda x: 1 if x > threshold else 0)

In [39]:
X_popularity = data[['age', 'total_reviews', 'total_positive', 'total_negative', 'n_achievements']]
y_popularity = data['is_popular']

In [40]:
model_hgb = HistGradientBoostingClassifier(random_state=42)
model_hgb.fit(X_popularity, y_popularity)

In [41]:
# خلية 6: نظام التوصيات باستخدام KNN
numeric_features = ['total_reviews', 'total_positive', 'total_negative', 'n_achievements']
categorical_features = ['genres']

In [42]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

In [43]:
pipeline_knn = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('knn', NearestNeighbors(n_neighbors=6))  # 6 لأن الجار الأول هو اللعبة نفسها
])

In [44]:
X_recommendation = data[['total_reviews', 'total_positive', 'total_negative', 'n_achievements', 'genres']]
pipeline_knn.fit(X_recommendation)

In [45]:
# تحديد المدخلات
X_recommendation = data[['total_reviews', 'total_positive', 'total_negative', 'n_achievements', 'genres']]

In [46]:
# تدريب النموذج
pipeline_knn.fit(X_recommendation)

In [47]:
joblib.dump(model_rf, 'models/rating_prediction_model.pkl')  # RandomForestRegresso
if hasattr(model_hgb, 'classes_'):
    print("النموذج مدرب.")
else:
    print("النموذج غير مدرب.")

النموذج مدرب.


In [48]:
joblib.dump(model_hgb, 'models/popularity_prediction_model.pkl')  # HistGradientBoostingClassifier
if hasattr(model_hgb, 'classes_'):
    print("النموذج مدرب.")
else:
    print("النموذج غير مدرب.")

النموذج مدرب.


In [49]:
joblib.dump(mlb, 'models/genres_encoder.pkl')  # OneHotEncoder
if hasattr(model_hgb, 'classes_'):
    print("النموذج مدرب.")
else:
    print("النموذج غير مدرب.")

النموذج مدرب.


In [50]:
joblib.dump(pipeline_knn, 'models/recommendation_model.pkl')  # KNN
if hasattr(model_hgb, 'classes_'):
    print("النموذج مدرب.")
else:
    print("النموذج غير مدرب.")

النموذج مدرب.


In [51]:
print(mlb.categories_)

[array(["[,',A,c,c,o,u,n,t,i,n,g,',,, ,',A,n,i,m,a,t,i,o,n, ,&, ,M,o,d,e,l,i,n,g,',,, ,',A,u,d,i,o, ,P,r,o,d,u,c,t,i,o,n,',,, ,',D,e,s,i,g,n, ,&, ,I,l,l,u,s,t,r,a,t,i,o,n,',,, ,',E,d,u,c,a,t,i,o,n,',,, ,',P,h,o,t,o, ,E,d,i,t,i,n,g,',,, ,',S,o,f,t,w,a,r,e, ,T,r,a,i,n,i,n,g,',,, ,',U,t,i,l,i,t,i,e,s,',,, ,',V,i,d,e,o, ,P,r,o,d,u,c,t,i,o,n,',,, ,',W,e,b, ,P,u,b,l,i,s,h,i,n,g,',,, ,',G,a,m,e, ,D,e,v,e,l,o,p,m,e,n,t,',]",
       "[,',A,c,c,o,u,n,t,i,n,g,',,, ,',A,n,i,m,a,t,i,o,n, ,&, ,M,o,d,e,l,i,n,g,',,, ,',A,u,d,i,o, ,P,r,o,d,u,c,t,i,o,n,',,, ,',D,e,s,i,g,n, ,&, ,I,l,l,u,s,t,r,a,t,i,o,n,',,, ,',E,d,u,c,a,t,i,o,n,',,, ,',P,h,o,t,o, ,E,d,i,t,i,n,g,',,, ,',S,o,f,t,w,a,r,e, ,T,r,a,i,n,i,n,g,',,, ,',U,t,i,l,i,t,i,e,s,',,, ,',V,i,d,e,o, ,P,r,o,d,u,c,t,i,o,n,',,, ,',W,e,b, ,P,u,b,l,i,s,h,i,n,g,',]",
       "[,',A,c,c,o,u,n,t,i,n,g,',,, ,',E,d,u,c,a,t,i,o,n,',,, ,',S,o,f,t,w,a,r,e, ,T,r,a,i,n,i,n,g,',,, ,',U,t,i,l,i,t,i,e,s,',,, ,',E,a,r,l,y, ,A,c,c,e,s,s,',]",
       ..., "[,',V,i,o,l,e,n,t,',]"