In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,PolynomialFeatures
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report,mean_squared_error, r2_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LinearRegression,Ridge
from sklearn.ensemble import RandomForestRegressor

In [None]:
df = pd.read_csv('dataset_Facebook.csv', sep=';')
df['Type'] = df['Type'].map({'Photo': 1, 'Status': 2, 'Link': 3,"Video":4})
df_ready = df.dropna()

selected_features_classificationforpaid = [
    "Page total likes", "Post Hour", "Lifetime Post Total Reach", "Lifetime Post Total Impressions",
    "Lifetime Engaged Users", "Lifetime Post Consumers", "Lifetime Post Consumptions",
    "Lifetime Post Impressions by people who have liked your Page",
    "Lifetime Post reach by people who like your Page",
    "Lifetime People who have liked your Page and engaged with your post",
    "comment", "like", "share", "Total Interactions"
]
selected_features_regressionforinteactions = [
    "Lifetime Post Total Reach", "Lifetime Post Total Impressions","Lifetime Post Consumers","Lifetime Engaged Users","Lifetime Post Consumptions",
    "Lifetime Post Impressions by people who have liked your Page",
    "Lifetime Post reach by people who like your Page",
    "Lifetime People who have liked your Page and engaged with your post",
    "Paid"
]

In [None]:
X = df_ready[selected_features_classificationforpaid]
y = df_ready["Paid"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
n_estimators_list = [10, 50, 100, 200]
max_depth_list = [2, 4, 6, 8]
results = {}
best_scoreRandom = 0
best_params = {}
rf_results = []
acc_scores = []
for depth in max_depth_list:
    for n_estimators in n_estimators_list:
        clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=depth, random_state=42)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        acc_scores.append(acc)
        rf_results.append({'N_estimators': n_estimators, 'Max_depth': depth, 'Accuracy': acc})
        if acc > best_scoreRandom:
            best_scoreRandom = acc
            best_params = {'N_estimators': n_estimators, 'Max_depth': depth}
            best_model = clf
    results[depth] = acc_scores
y_pred_best = best_model.predict(X_test)
print(f"Best Parameters: N_estimators = {best_params['N_estimators']}, Max_depth = {best_params['Max_depth']}")
print("Classification Report: \n", classification_report(y_test,y_pred_best))
rf_df = pd.DataFrame(rf_results)

Best Parameters: N_estimators = 200, Max_depth = 2
Classification Report: 
               precision    recall  f1-score   support

         0.0       0.81      1.00      0.90        79
         1.0       1.00      0.10      0.18        20

    accuracy                           0.82        99
   macro avg       0.91      0.55      0.54        99
weighted avg       0.85      0.82      0.75        99



In [None]:
X = df_ready[selected_features_classificationforpaid]
y = df_ready["Paid"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
k_values = list(range(1, 21))
accuracies = []
knn_results = []
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)
    knn_results.append({ 'K': k, 'Accuracy': acc})
best_k = k_values[accuracies.index(max(accuracies))]
knn_final = KNeighborsClassifier(n_neighbors=best_k)
knn_final.fit(X_train, y_train)
y_pred_final = knn_final.predict(X_test)
print("Best k:", best_k)
print("Classification Report: \n", classification_report(y_test,y_pred_final))
bestknn =accuracy_score(y_test, y_pred_final)
knn_df = pd.DataFrame(knn_results)

Best k: 19
Classification Report: 
               precision    recall  f1-score   support

         0.0       0.84      0.96      0.89        79
         1.0       0.62      0.25      0.36        20

    accuracy                           0.82        99
   macro avg       0.73      0.61      0.63        99
weighted avg       0.79      0.82      0.79        99



In [None]:
X = df_ready[selected_features_classificationforpaid]
y = df_ready["Paid"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = GaussianNB()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print("Classification Report: \n", classification_report(y_test,predictions))
Naiveacc = accuracy_score(y_test, predictions)

Classification Report: 
               precision    recall  f1-score   support

         0.0       0.83      0.94      0.88        79
         1.0       0.50      0.25      0.33        20

    accuracy                           0.80        99
   macro avg       0.67      0.59      0.61        99
weighted avg       0.76      0.80      0.77        99



In [None]:
X = df_ready[selected_features_regressionforinteactions]
y = df_ready["Total Interactions"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
n_estimators_list = [10, 50, 100, 200]
max_depth_list = [2, 4, 6, 8]
results = {}
best_scoreRandomreg = 0
best_params = {}
rfreg_results = []
acc_scores = []
for depth in max_depth_list:
    for n_estimators in n_estimators_list:
        clf = RandomForestRegressor(n_estimators=n_estimators, max_depth=depth, random_state=0)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        acc = r2_score(y_test, y_pred)
        acc_scores.append(acc)
        rfreg_results.append({'N_estimators': n_estimators, 'Max_depth': depth, 'R2_Score': acc})
        if acc > best_scoreRandomreg:
            best_scoreRandomreg = acc
            best_params = {'N_estimators': n_estimators, 'Max_depth': depth}
            best_model = clf
    results[depth] = acc_scores
y_pred_best = best_model.predict(X_test)
print(f"Best Parameters: N_estimators = {best_params['N_estimators']}, Max_depth = {best_params['Max_depth']}")
print("R2 Score: \n", r2_score(y_test, y_pred_best))
print(f"MSE: {mean_squared_error(y_test, y_pred_best)}")
rfreg_df = pd.DataFrame(rfreg_results)

Best Parameters: N_estimators = 50, Max_depth = 6
R2 Score: 
 0.44748454679069316
MSE: 223129.5028493962


In [None]:
degrees = [1, 2]
poly_results = []
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
for degree in degrees:
    poly_features = PolynomialFeatures(degree)
    x_poly_train = poly_features.fit_transform(X_train_scaled)
    x_poly_test = poly_features.transform(X_test_scaled)
    model = LinearRegression()
    model.fit(x_poly_train, y_train)
    y_pred = model.predict(x_poly_test)
    poly_results.append({'Degree': degree,'MSE': mean_squared_error(y_test, y_pred),'R2': r2_score(y_test, y_pred)})
    acc = r2_score(y_test, y_pred)
    print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
    print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))
poly_df = pd.DataFrame(poly_results)

Mean squared error: 21821.54
Coefficient of determination: 0.95
Mean squared error: 770819.67
Coefficient of determination: -0.91


In [None]:
alphas = [100, 10, 1, 0.1, 0.001]
ridge_results = []
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
for degree in degrees:
    poly_features = PolynomialFeatures(degree)
    x_poly_train = poly_features.fit_transform(X_train_scaled)
    x_poly_test = poly_features.transform(X_test_scaled)
    for alpha in alphas:
      poly_features = PolynomialFeatures(degree)
      x_poly_train = poly_features.fit_transform(X_train_scaled)
      x_poly_test = poly_features.transform(X_test_scaled)
      model = Ridge(alpha=alpha)
      model.fit(x_poly_train, y_train)
      y_pred = model.predict(x_poly_test)
      ridge_results.append({'Degree': degree ,'Alpha': alpha,'MSE': mean_squared_error(y_test, y_pred),'R2': r2_score(y_test, y_pred)})
      print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
      print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))
ridge_df = pd.DataFrame(ridge_results)

Mean squared error: 258485.61
Coefficient of determination: 0.36
Mean squared error: 122913.63
Coefficient of determination: 0.70
Mean squared error: 32258.59
Coefficient of determination: 0.92
Mean squared error: 22712.03
Coefficient of determination: 0.94
Mean squared error: 21830.21
Coefficient of determination: 0.95
Mean squared error: 136166.09
Coefficient of determination: 0.66
Mean squared error: 271250.29
Coefficient of determination: 0.33
Mean squared error: 107429.89
Coefficient of determination: 0.73
Mean squared error: 121942.23
Coefficient of determination: 0.70
Mean squared error: 644906.12
Coefficient of determination: -0.60


In [None]:
resultsclass_dict = [
    {"Classifier": "Random Forest", "Score": best_scoreRandom},
    {"Classifier": "KNN", "Score": bestknn},
    {"Classifier": "Naive Bayes", "Score": Naiveacc}
]
resultsclass_df = pd.DataFrame(resultsclass_dict)


In [None]:
ridge_max_r2 = max(ridge_results, key=lambda x: x['R2'])['R2']
poly_max_r2 = max(poly_results, key=lambda x: x['R2'])['R2']
resultsreg_dict = [
    {"Regression": "Ridge", "Score": ridge_max_r2},
    {"Regression": "Polynomial", "Score": poly_max_r2},
    {"Regression": "Random forest", "Score": best_scoreRandomreg}
]
resultsreg_df = pd.DataFrame(resultsreg_dict)
