In [18]:
from typing import Dict, List
import warnings
warnings.filterwarnings("ignore", message="A parameter name that contains")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.sentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, f1_score
from imblearn.over_sampling import SMOTE
import textstat
import joblib

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt to C:\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to C:\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to C:\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package omw-1.4 to C:\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [19]:
def load_and_prepare_data(path: str) -> pd.DataFrame:
    """
    Load dataset and rename columns.

    Args:
        path (str): File path to CSV dataset.

    Returns:
        pd.DataFrame: Prepared dataframe with columns ['text', 'label'].
    """
    data = pd.read_csv(path)
    data.rename(columns={'sentence': 'text', 'emotion': 'label'}, inplace=True)
    return data

data = load_and_prepare_data(r"D:\Grad_Proj\project\combined_emotion.csv")
data

Unnamed: 0,text,label
0,i just feel really helpless and heavy hearted,fear
1,ive enjoyed being able to slouch about relax a...,sad
2,i gave up my internship with the dmrg and am f...,fear
3,i dont know i feel so lost,sad
4,i am a kindergarten teacher and i am thoroughl...,fear
...,...,...
422741,i begun to feel distressed for you,fear
422742,i left feeling annoyed and angry thinking that...,anger
422743,i were to ever get married i d have everything...,joy
422744,i feel reluctant in applying there because i w...,fear


In [20]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 422746 entries, 0 to 422745
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   text    422746 non-null  object
 1   label   422746 non-null  object
dtypes: object(2)
memory usage: 6.5+ MB


In [21]:
data['No_of_Chars'] = data['text'].apply(len)
data['No_of_Words'] = data.apply(lambda row: nltk.word_tokenize(row['text']), axis= 1).apply(len)
data['No_of_Sents'] = data.apply(lambda row: nltk.sent_tokenize(row['text']), axis= 1).apply(len)

data.describe()

Unnamed: 0,No_of_Chars,No_of_Words,No_of_Sents
count,422746.0,422746.0,422746.0
mean,97.03398,19.220179,1.0
std,56.198156,11.057121,0.0
min,2.0,1.0,1.0
25%,54.0,11.0,1.0
50%,86.0,17.0,1.0
75%,128.0,25.0,1.0
max,830.0,178.0,1.0


In [22]:
def encode_labels(df: pd.DataFrame, col: str) -> (pd.DataFrame, LabelEncoder):
    """
    Encode categorical labels to integers.

    Args:
        df (pd.DataFrame): Input dataframe.
        col (str): Column name of labels.

    Returns:
        tuple: Dataframe with encoded labels, fitted LabelEncoder.
    """
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    return df, le

data, le = encode_labels(data, 'label')


In [23]:
""" cols_color = ['black', 'blue', 'red', 'green', 'purple', 'cyan']
plt.figure(figsize=(12,8))
fg = sns.countplot(x= data['label'], palette= cols_color)
fg.set_title('count plot of classes')
fg.set_xlabel('classes')
fg.set_ylabel('count of classes') """

" cols_color = ['black', 'blue', 'red', 'green', 'purple', 'cyan']\nplt.figure(figsize=(12,8))\nfg = sns.countplot(x= data['label'], palette= cols_color)\nfg.set_title('count plot of classes')\nfg.set_xlabel('classes')\nfg.set_ylabel('count of classes') "

In [24]:
""" plt.figure(figsize=(12,8))
fg = sns.pairplot(data= data, hue= 'label', palette= cols_color)
plt.show(fg) """

" plt.figure(figsize=(12,8))\nfg = sns.pairplot(data= data, hue= 'label', palette= cols_color)\nplt.show(fg) "

In [25]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

In [26]:
from nltk.corpus import wordnet

def get_wordnet_pos(tag):
    """
    تحويل POS tag من شكل NLTK إلى الشكل المطلوب من WordNetLemmatizer.
    """
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # الافتراضي




In [27]:
def preprocess_text(text: str) -> str:
    """
    تنظيف النص مع تحسين lemmatization باستخدام POS tagging.
    """
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    tokens = word_tokenize(text)
    pos_tags = nltk.pos_tag(tokens)
    lemmatized_words = [
        lemmatizer.lemmatize(word, get_wordnet_pos(tag))
        for word, tag in pos_tags
        if word not in stop_words
    ]
    return ' '.join(lemmatized_words)

data['lemmatized_words'] = data['text'].apply(preprocess_text)

In [28]:
""" cols_color = ['black', 'blue', 'red', 'green', 'purple', 'cyan']
plt.figure(figsize=(12,8))
fg = sns.countplot(x= data['label'], palette= cols_color)
fg.set_title('count plot of classes')
fg.set_xlabel('classes')
fg.set_ylabel('count of classes') """

" cols_color = ['black', 'blue', 'red', 'green', 'purple', 'cyan']\nplt.figure(figsize=(12,8))\nfg = sns.countplot(x= data['label'], palette= cols_color)\nfg.set_title('count plot of classes')\nfg.set_xlabel('classes')\nfg.set_ylabel('count of classes') "

In [29]:
""" all_words = " ".join(sentence for sentence in data['lemmatized_words'])
all_words

wordcloud = WordCloud(width=800, height=500, random_state=42, max_font_size=100).generate(all_words)

plt.figure(figsize=(12,8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show() """

' all_words = " ".join(sentence for sentence in data[\'lemmatized_words\'])\nall_words\n\nwordcloud = WordCloud(width=800, height=500, random_state=42, max_font_size=100).generate(all_words)\n\nplt.figure(figsize=(12,8))\nplt.imshow(wordcloud, interpolation=\'bilinear\')\nplt.axis(\'off\')\nplt.show() '

In [30]:
tfidf = TfidfVectorizer(max_features=1000, max_df=0.9, min_df=2, stop_words='english', ngram_range=(1, 2))
sia = SentimentIntensityAnalyzer()

def vectorize_text(df: pd.DataFrame, text_col: str, vectorizer: TfidfVectorizer):
    """
    Vectorize text column using TF-IDF.

    Args:
        df (pd.DataFrame): Dataframe containing text data.
        text_col (str): Name of the column with preprocessed text.
        vectorizer (TfidfVectorizer): Initialized vectorizer.

    Returns:
        sparse matrix: TF-IDF features matrix.
    """
    X = vectorizer.fit_transform(df[text_col])
    return X

sampled_data = data.sample(frac=0.3, random_state=42)
X = vectorize_text(data, 'lemmatized_words', tfidf)
y = data['label']


In [31]:
""" def extract_additional_features(text: str) -> list:
    words = word_tokenize(text)
    num_words = len(words)
    num_chars = len(text)
    sentiment = sia.polarity_scores(text)['compound']
    readability = textstat.flesch_reading_ease(text)
    return [num_words, num_chars, sentiment, readability]

additional_features = np.array([extract_additional_features(text) for text in data['lemmatized_words']])
scaler = StandardScaler()
additional_features_scaled = scaler.fit_transform(additional_features) """

" def extract_additional_features(text: str) -> list:\n    words = word_tokenize(text)\n    num_words = len(words)\n    num_chars = len(text)\n    sentiment = sia.polarity_scores(text)['compound']\n    readability = textstat.flesch_reading_ease(text)\n    return [num_words, num_chars, sentiment, readability]\n\nadditional_features = np.array([extract_additional_features(text) for text in data['lemmatized_words']])\nscaler = StandardScaler()\nadditional_features_scaled = scaler.fit_transform(additional_features) "

In [32]:
""" from scipy.sparse import hstack
from scipy.sparse import csr_matrix

X_combined = hstack([X, csr_matrix(additional_features_scaled)])
 """

' from scipy.sparse import hstack\nfrom scipy.sparse import csr_matrix\n\nX_combined = hstack([X, csr_matrix(additional_features_scaled)])\n '

In [33]:
def extract_pos_tags(text):
    tokens = nltk.word_tokenize(text)
    pos_tags = nltk.pos_tag(tokens)
    return ' '.join(tag for word, tag in pos_tags)

data['pos_tags'] = data['lemmatized_words'].apply(extract_pos_tags)

tfidf_pos = TfidfVectorizer()
X_pos = tfidf_pos.fit_transform(data['pos_tags'])
data['pos_tags']

0                                           NN RB JJ NN VBD
1         JJ NN JJ JJ NN IN JJ NN JJ NN RB VBP JJ NN RB ...
2                                            VB NN NN NN NN
3                                              NN VBP NN VB
4         VB PRP RB JJ NN VB NN NN NN NN NN NN VBP NN NN...
                                ...                        
422741                                            VB NNS JJ
422742                              VB NN IN JJ NN NN JJ NN
422743    RB VB JJ NN NN VBP VB RB VBP RB RB VBP JJ NN N...
422744                 NN JJ NN VBP JJ VBP NN VBP JJS CD NN
422745                                    JJ NN NN IN NN NN
Name: pos_tags, Length: 422746, dtype: object

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

print("Before SMOTE:", np.bincount(y_train))
print("After SMOTE:", np.bincount(y_train_balanced))


Before SMOTE: [ 47454  39719 114453  27643  96949  11978]
After SMOTE: [114453 114453 114453 114453 114453 114453]


In [35]:
from collections import Counter
print("Before SMOTE:", Counter(y_train))
print("After SMOTE:", Counter(y_train_balanced))


Before SMOTE: Counter({2: 114453, 4: 96949, 0: 47454, 1: 39719, 3: 27643, 5: 11978})
After SMOTE: Counter({4: 114453, 1: 114453, 2: 114453, 0: 114453, 3: 114453, 5: 114453})


In [36]:
row = X[0].toarray().flatten()
for i, val in enumerate(row):
    if val != 0:
        print(f'index {i}: {val}')


index 262: 0.1263050337217284
index 350: 0.5806697162326915
index 478: 0.6752847771992059
index 739: 0.4368754843085354


In [37]:
def train_and_evaluate(X_train, y_train, X_test, y_test):
    """
    Train Random Forest and evaluate on test set.

    Args:
        X_train, y_train: Training features and labels.
        X_test, y_test: Testing features and labels.

    Returns:
        model: Trained Random Forest model.
    """

    
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    
    
    """  param_grid = {
    'n_estimators': [100, 200, 300],           # عدد الأشجار
    'max_depth': [None, 10, 20, 30],           # أقصى عمق للشجرة
    'min_samples_split': [2, 5, 10]             # أقل عدد عينات لتقسيم عقدة
    } """
    """  
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid,cv=3, n_jobs=-1, verbose=2, scoring='f1_weighted')
    grid_search.fit(X_train_balanced, y_train_balanced)
    
    print("أفضل المعاملات:", grid_search.best_params_)
    print("أفضل نتيجة F1 (weighted):", grid_search.best_score_)
    
    best_rf_model = grid_search.best_estimator_ """

    y_pred = model.predict(X_test)
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(f"Test set F1 Score (weighted): {f1_score(y_test, y_pred, average='weighted'):.4f}")
    print(classification_report(y_test, y_pred))
    print(classification_report(y_test, y_pred, target_names=le.classes_))
    
    """ cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8,6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show() """
    
    print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))
    return model

random_forest_model = train_and_evaluate(X_train_balanced, y_train_balanced, X_test, y_test)


Accuracy: 0.7645
Test set F1 Score (weighted): 0.7672
              precision    recall  f1-score   support

           0       0.72      0.78      0.75     11863
           1       0.71      0.71      0.71      9930
           2       0.82      0.79      0.80     28614
           3       0.57      0.65      0.61      6911
           4       0.86      0.80      0.83     24238
           5       0.46      0.55      0.50      2994

    accuracy                           0.76     84550
   macro avg       0.69      0.72      0.70     84550
weighted avg       0.77      0.76      0.77     84550

              precision    recall  f1-score   support

       anger       0.72      0.78      0.75     11863
        fear       0.71      0.71      0.71      9930
         joy       0.82      0.79      0.80     28614
        love       0.57      0.65      0.61      6911
         sad       0.86      0.80      0.83     24238
     suprise       0.46      0.55      0.50      2994

    accuracy           

comparison between models


In [38]:
""" models = {
    'Random Forest': RandomForestClassifier(),
    'Naive Bayes': MultinomialNB(),
    'SVM': SVC(),
    'KNN': KNeighborsClassifier()
}

for name, model in models.items():
    model.fit(X_train_balanced, y_train_balanced)
    y_pred = model.predict(X_test)
    print(f"{name} Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print(f"{name} F1 Score: {f1_score(y_test, y_pred, average='weighted'):.2f}")
    print("-" * 30)
 """

' models = {\n    \'Random Forest\': RandomForestClassifier(),\n    \'Naive Bayes\': MultinomialNB(),\n    \'SVM\': SVC(),\n    \'KNN\': KNeighborsClassifier()\n}\n\nfor name, model in models.items():\n    model.fit(X_train_balanced, y_train_balanced)\n    y_pred = model.predict(X_test)\n    print(f"{name} Accuracy: {accuracy_score(y_test, y_pred):.2f}")\n    print(f"{name} F1 Score: {f1_score(y_test, y_pred, average=\'weighted\'):.2f}")\n    print("-" * 30)\n '

In [56]:
import joblib

joblib.dump(random_forest_model, './models/random_forest_model.pkl')
joblib.dump(tfidf, './models/tfidf_vectorizer.pkl')
joblib.dump(le, './models/label_encoder.pkl')


['./models/label_encoder.pkl']

In [41]:
def predict_emotion(text, model, vectorizer, label_encoder):
    processed = preprocess_text(text)
    
    vect_text = vectorizer.transform([processed])
    
    prediction = model.predict(vect_text)
    return label_encoder.inverse_transform(prediction)[0]

print(predict_emotion("I feel so happy and joyful today!", random_forest_model, tfidf, le))


joy


In [42]:
from collections import Counter
print("Before SMOTE:", Counter(y_train))
print("After SMOTE:", Counter(y_train_balanced))


Before SMOTE: Counter({2: 114453, 4: 96949, 0: 47454, 1: 39719, 3: 27643, 5: 11978})
After SMOTE: Counter({4: 114453, 1: 114453, 2: 114453, 0: 114453, 3: 114453, 5: 114453})


In [43]:
books_db = pd.read_csv("books.csv", index_col='book_id')
books_db = books_db[['original_title', 'authors']]
books_db.rename(columns={'original_title': 'title'}, inplace=True)
books_db




Unnamed: 0_level_0,title,authors
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2767052,The Hunger Games,Suzanne Collins
3,Harry Potter and the Philosopher's Stone,"J.K. Rowling, Mary GrandPré"
41865,Twilight,Stephenie Meyer
2657,To Kill a Mockingbird,Harper Lee
4671,The Great Gatsby,F. Scott Fitzgerald
...,...,...
7130616,Bayou Moon,Ilona Andrews
208324,Means of Ascent,Robert A. Caro
77431,The Mauritius Command,Patrick O'Brian
8565083,Cinderella Ate My Daughter: Dispatches from th...,Peggy Orenstein


In [44]:
books_db.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10000 entries, 2767052 to 8914
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    9415 non-null   object
 1   authors  10000 non-null  object
dtypes: object(2)
memory usage: 234.4+ KB


In [45]:
books_db.dropna(inplace=True)

In [46]:
articles_db = pd.read_csv("articles.csv", index_col='id')  # يحتوي على title, link
articles_db


Unnamed: 0_level_0,url,title
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,https://towardsdatascience.com/not-all-rainbow...,Not All Rainbows and Sunshine: The Darker Side...
2,https://towardsdatascience.com/ethics-in-ai-po...,Ethics in AI: Potential Root Causes for Biased...
3,https://towardsdatascience.com/python-tuple-th...,"Python Tuple, The Whole Truth and Only the Tru..."
4,https://towardsdatascience.com/dates-and-subqu...,Dates and Subqueries in SQL
5,https://towardsdatascience.com/temporal-differ...,Temporal Differences with Python: First Sample...
...,...,...
2494,https://medium.com/swlh/brian-chesky-is-an-exa...,Brian Chesky is an Example of What it Means to...
2495,https://medium.com/swlh/5-red-flags-of-online-...,5 Red Flags of Online Business Gurus
2496,https://writingcooperative.com/recognizing-the...,Recognizing These Three Realities Can Help Set...
2497,https://writingcooperative.com/i-remember-it-l...,“I Remember It Like It Was Just Yesterday…” Re...


In [47]:
articles_db.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2498 entries, 1 to 2498
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   url     2498 non-null   object
 1   title   2498 non-null   object
dtypes: object(2)
memory usage: 58.5+ KB


In [58]:
model = joblib.load("./models/random_forest_model.pkl")
vectorizer = joblib.load("./models/tfidf_vectorizer.pkl")

In [60]:
from sklearn.base import BaseEstimator

def classify_emotions(
    df: pd.DataFrame,
    text_column: str,
    model: BaseEstimator,
    vectorizer: TfidfVectorizer,
    le: LabelEncoder,
    positive_emotions: List[str] = ["joy", "love", "surprise"],
    save_path: str = None
) -> pd.DataFrame:
    """
    يصنف المشاعر في عمود نصي ويُرجع فقط الصفوف ذات المشاعر الإيجابية.

    Args:
        df (pd.DataFrame): داتا فريم يحتوي على النصوص.
        text_column (str): اسم العمود الذي يحتوي على النصوص (مثل: "title" أو "book_title").
        model (BaseEstimator): نموذج تصنيف مدرب.
        vectorizer (TfidfVectorizer): محول TF-IDF.
        scaler (StandardScaler): مقياس الميزات الإضافية.
        le (LabelEncoder): مشفر العواطف.
        positive_emotions (List[str], optional): قائمة بالعواطف الإيجابية للاحتفاظ بها.
        save_path (str, optional): إذا تم توفيره، سيتم حفظ الداتا فريم النهائي في هذا المسار كملف CSV.

    Returns:
        pd.DataFrame: داتا فريم يحتوي على النصوص المصنفة ذات العواطف الإيجابية.
    """

    if text_column not in df.columns:
        raise ValueError(f"❌ العمود '{text_column}' غير موجود في الداتا فريم.")

    df['clean_text'] = df[text_column].apply(preprocess_text)

    X_text = vectorizer.transform(df['clean_text'])


    predicted = model.predict(X_text)
    df['emotion'] = le.inverse_transform(predicted)

    df_filtered = df[df['emotion'].isin(positive_emotions)]


    if save_path:
        df_filtered.to_csv(save_path, index=False)

    return df_filtered


In [61]:
try:
    df_clean = classify_emotions(
        df=books_db,
        text_column="title",
        model=model,
        vectorizer=vectorizer,
        le=le,
        save_path="classified_books.csv"
    )
except ValueError as e:
    print(str(e))


In [62]:
try:
    df_clean = classify_emotions(
        df=articles_db,
        text_column="title",
        model=model,
        vectorizer=vectorizer,
        le=le,
        save_path="classified_articles.csv"
    )
except ValueError as e:
    print(str(e))


In [63]:
def filter_by_keywords(df: pd.DataFrame, include_keywords=None, exclude_keywords=None) -> pd.DataFrame:
    """
    ترشيح المحتوى بناءً على كلمات مفتاحية.
    
    Args:
        df (pd.DataFrame): جدول المحتوى.
        include_keywords (list): كلمات يجب أن يحتويها النص.
        exclude_keywords (list): كلمات يجب ألا يحتويها النص.
    
    Returns:
        pd.DataFrame: محتوى بعد التصفية.
    """
    if include_keywords:
        pattern = '|'.join(include_keywords)
        df = df[df['title'].str.contains(pattern, case=False, na=False)]
    
    if exclude_keywords:
        pattern = '|'.join(exclude_keywords)
        df = df[~df['title'].str.contains(pattern, case=False, na=False)]

    return df

In [64]:
mood_map = {
    'sadness': ['joy', 'surprise'],
    'anger': ['love', 'joy'],
    'fear': ['love', 'joy'],
    'joy': ['joy','surprise', 'love'],
    'surprise': ['love', 'joy'],
    'love': ['joy', 'love', 'surprise']
    }

In [65]:

def recommend_content_filtered(
    user_text: str,
    model,
    vectorizer,
    label_encoder,
    rec_db: pd.DataFrame,
    include_keywords=None,
    exclude_keywords=None
) -> pd.DataFrame:
    """
    #توصية بمحتوى مع فلترة اختيارية بناءً على الكلمات المفتاحية.
    """
    # تنظيف أسماء الأعمدة من المسافات الزائدة
    rec_db.columns = rec_db.columns.str.strip()

    # التأكد من وجود الأعمدة المطلوبة
    if 'predicted_emotion' not in rec_db.columns or 'original_title' not in rec_db.columns:
        raise KeyError("⚠️ تأكد أن الداتا تحتوي على الأعمدة: 'original_title' و 'predicted_emotion'.")

    # التنبؤ بالمشاعر للنص
    user_emotion = predict_emotion(user_text, model, vectorizer, label_encoder)
    print(f"🔍 Detected Emotion: {user_emotion}")

    # تحديد المشاعر المستهدفة بناءً على mood_map
    target_emotions = mood_map.get(user_emotion, ['joy', 'calm', 'confidence'])

    # ترشيح المحتوى بناءً على المشاعر
    recommended = rec_db[rec_db['predicted_emotion'].isin(target_emotions)]

    # فلترة المحتوى بناءً على الكلمات
    filtered = filter_by_keywords(recommended, include_keywords, exclude_keywords)

    # في حالة عدم وجود نتائج بعد الفلترة
    if filtered.empty:
        print("⚠️ لا يوجد محتوى يلبي معايير الفلترة. عرض نتائج بدون فلترة.")
        return recommended[['original_title', 'authors','predicted_emotion']].sample(min(3, len(recommended)))

    return filtered[['original_title', 'authors', 'predicted_emotion']].sample(min(3, len(filtered)))



In [66]:
def recommend_books(emotion: str, top_n: int = 5) -> List[Dict]:
    df = pd.read_csv("classified_books.csv")  

    if "emotion" not in df.columns:
        df["emotion"] = df["title"].apply(predict_emotion)

    target_emotions = mood_map.get(emotion, ["joy", "love", "surprise"])

    recommended = df[df["emotion"].isin(target_emotions)].head(top_n)

    return recommended[["title", "authors", "emotion"]].to_dict(orient="records")


In [67]:
def recommend_articles(emotion: str, top_n: int = 5) -> List[Dict]:
    df = pd.read_csv("classified_articles.csv")

    if "emotion" not in df.columns:
        df["emotion"] = df["title"].apply(predict_emotion)

    target_emotions = mood_map.get(emotion, ["joy", "love", "surprise"])

    recommended = df[df["emotion"].isin(target_emotions)].head(top_n)

    return recommended[["title", "url", "emotion"]].to_dict(orient="records")


In [68]:
def recommend_content(emotion: str, top_n: int = 5) -> Dict[str, List[Dict]]:
    books = recommend_books(emotion)[:top_n]
    articles = recommend_articles(emotion)[:top_n]
    return {
        "books": books,
        "articles": articles
    }


In [69]:
results = recommend_content(emotion="sad")

print("Books:")
for item in results.get('books', []):
    print(f"- Title: {item.get('title')}, Authors: {item.get('authors')}, Emotion: {item.get('emotion')}")

print("\nArticles:")
for item in results.get('articles', []):
    print(f"- Title: {item.get('title')}, Link: {item.get('link')}, Emotion: {item.get('emotion')}")


Books:
- Title: The Hunger Games, Authors: Suzanne Collins, Emotion: joy
- Title: The Lovely Bones, Authors: Alice Sebold, Emotion: love
- Title: Gone Girl, Authors: Gillian Flynn, Emotion: joy
- Title: The Time Traveler's Wife, Authors: Audrey Niffenegger, Emotion: joy
- Title: A Game of Thrones, Authors: George R.R. Martin, Emotion: joy

Articles:
- Title: Don’t Become a Full-Time Content Creator If You Have Low-Risk Tolerance, Link: None, Emotion: joy
- Title: <strong class="markup--strong markup--h3-strong">How My MBA Made Me a Better Fiction Writer</strong>, Link: None, Emotion: joy
- Title: How to Start Your Novel with Momentum to Finish It, Link: None, Emotion: joy
- Title: <strong class="markup--strong markup--h3-strong">Using Propensity-Score Matching to Build Leading Indicators</strong>, Link: None, Emotion: joy
- Title: Sparkles aren’t good UX✨, Link: None, Emotion: joy


In [70]:
results = recommend_books(emotion="sad")
for book in results:
    print(book)

{'title': 'The Hunger Games', 'authors': 'Suzanne Collins', 'emotion': 'joy'}
{'title': 'The Lovely Bones', 'authors': 'Alice Sebold', 'emotion': 'love'}
{'title': 'Gone Girl', 'authors': 'Gillian Flynn', 'emotion': 'joy'}
{'title': "The Time Traveler's Wife", 'authors': 'Audrey Niffenegger', 'emotion': 'joy'}
{'title': 'A Game of Thrones', 'authors': 'George R.R. Martin', 'emotion': 'joy'}


In [71]:
results = recommend_articles(emotion="sad")
for article in results:
    print(article)


{'title': 'Don’t Become a Full-Time Content Creator If You Have Low-Risk Tolerance', 'url': 'https://medium.com/swlh/dont-become-a-full-time-content-creator-if-you-have-low-risk-tolerance-13fa2f77791a', 'emotion': 'joy'}
{'title': '<strong class="markup--strong markup--h3-strong">How My MBA Made Me a Better Fiction\xa0Writer</strong>', 'url': 'https://writingcooperative.com/how-my-mba-made-me-a-better-fiction-writer-d222bc61a5b0', 'emotion': 'joy'}
{'title': 'How to Start Your Novel with Momentum to Finish\xa0It', 'url': 'https://writingcooperative.com/how-to-start-your-novel-with-momentum-to-finish-it-8d001f4908c5', 'emotion': 'joy'}
{'title': '<strong class="markup--strong markup--h3-strong">Using Propensity-Score Matching to Build Leading Indicators</strong>', 'url': 'https://towardsdatascience.com/using-propensity-score-matching-to-build-leading-indicators-3e656dccbaf9', 'emotion': 'joy'}
{'title': 'Sparkles aren’t good\xa0UX✨', 'url': 'https://uxdesign.cc/sparkles-arent-good-ux-4b

In [74]:
from collections import Counter
import datetime

def generate_daily_emotion_report(posts, model, vectorizer, label_encoder):
    # تاريخ اليوم
    today = datetime.date.today().strftime("%d/%m/%Y")
    
    # تحويل النصوص إلى تمثيل عددي
    X = vectorizer.transform(posts)
    
    # توقعات النموذج
    predictions = model.predict(X)
    
    # تحويل التوقعات إلى labels مفهومة
    labels = label_encoder.inverse_transform(predictions)
    
    # حساب عدد كل فئة
    counter = Counter(labels)
    
    total_posts = len(posts)
    positive_posts = [post for post, label in zip(posts, labels) if label == 'Positive']
    negative_posts = [post for post, label in zip(posts, labels) if label == 'Negative']
    neutral_posts  = [post for post, label in zip(posts, labels) if label == 'Neutral']
    
    print(f"\n📅 التاريخ: {today}")
    print(f"✅ تم تحليل {total_posts} منشور اليوم.\n")
    
    print("📊 **نتائج التحليل**")
    print(f"- عدد المنشورات الإيجابية: {counter.get('Positive', 0)}")
    print(f"- عدد المنشورات السلبية: {counter.get('Negative', 0)}")
    print(f"- عدد المنشورات المحايدة: {counter.get('Neutral', 0)}\n")
    
    if positive_posts:
        print("🌱 أمثلة على منشورات إيجابية:")
        for p in positive_posts[:2]:  # مثال: أول 2
            print(f"  - {p}")
    
    if negative_posts:
        print("\n⚠️ أمثلة على منشورات سلبية:")
        for p in negative_posts[:2]:
            print(f"  - {p}")
    
    if neutral_posts:
        print("\nℹ️ أمثلة على منشورات محايدة:")
        for p in neutral_posts[:2]:
            print(f"  - {p}")
    
    print("\n✅ الخطة لليوم القادم:")
    print("- تحليل بيانات إضافية.")
    print("- تحسين دقة النموذج.")
    print("- عرض النتائج بشكل رسومي مبسط.\n")

# ---------------------
# مثال الاستخدام:
daily_posts = [
    "I feel really anxious about school tomorrow.",
    "Had fun with my friends today!",
    "Why is everything going wrong in my life?",
    "Watched a peaceful documentary about nature.",
    "I feel so loved and appreciated."
]

generate_daily_emotion_report(
    posts=daily_posts,
    model=random_forest_model,
    vectorizer=tfidf,
    label_encoder=le
)



📅 التاريخ: 05/07/2025
✅ تم تحليل 5 منشور اليوم.

📊 **نتائج التحليل**
- عدد المنشورات الإيجابية: 0
- عدد المنشورات السلبية: 0
- عدد المنشورات المحايدة: 0


✅ الخطة لليوم القادم:
- تحليل بيانات إضافية.
- تحسين دقة النموذج.
- عرض النتائج بشكل رسومي مبسط.

