In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd /content/drive/MyDrive/Colab Notebooks

/content/drive/MyDrive/Colab Notebooks


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# load the data into a DataFrame
viewer_df = pd.read_csv('mock_viewer_data.csv')
viewer_df.head()

Unnamed: 0,Username,Country,Title,Date Watched,Percentage Watched,Rating
0,nguyenjames,Namibia,To All The Boys: Always And Forever,2022-07-26,20.156132,R
1,ericramirez,Azerbaijan,Iceman,2022-09-29,76.724487,TV-Y7
2,ccole,Saint Vincent and the Grenadines,Almost Love,2022-08-11,1.713326,TV-MA
3,russellbrandon,Kyrgyz Republic,She-Ra and the Princesses of Power,2023-03-12,15.677891,R
4,courtneydavis,Guam,Uncovered,2023-03-14,2.444522,TV-MA


In [None]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

viewer_df


Unnamed: 0,Username,Country,Title,Date Watched,Percentage Watched,Rating
0,nguyenjames,Namibia,To All The Boys: Always And Forever,2022-07-26,20.156132,R
1,ericramirez,Azerbaijan,Iceman,2022-09-29,76.724487,TV-Y7
2,ccole,Saint Vincent and the Grenadines,Almost Love,2022-08-11,1.713326,TV-MA
3,russellbrandon,Kyrgyz Republic,She-Ra and the Princesses of Power,2023-03-12,15.677891,R
4,courtneydavis,Guam,Uncovered,2023-03-14,2.444522,TV-MA
...,...,...,...,...,...,...
995,charlescoleman,Uganda,Goldie & Bear,2023-02-26,53.538089,TV-14
996,kathrynbell,Uganda,Adore,2023-03-31,61.448907,TV-MA
997,rjackson,United States Minor Outlying Islands,Growing Up Wild,2023-05-10,11.375822,TV-MA
998,hardingaaron,Saint Kitts and Nevis,Prem Ratan Dhan Payo,2022-11-19,87.404909,R


In [None]:
def create_apriori_datastructure(dataframe, username_col='Username', item_col='Title'):
  grouped = dataframe.groupby([username_col, item_col], as_index=False).size()
  apriori_datastructure = pd.pivot(data=grouped, index=username_col, columns=item_col, values='size').fillna(0).applymap(lambda x: 1 if x > 0 else 0)
  return apriori_datastructure

### This code is used to transform a dataframe into a structure that can be used by the Apriori algorithm. The function takes a dataframe with columns such as username and item name and converts it into a matrix structure that is suitable for the Apriori algorithm.

In [None]:
apriori_df = create_apriori_datastructure(viewer_df)
print(apriori_df)

Title         #Alive  100 Things to do Before High School  100% Halal  \
Username                                                                
aaronwalsh         0                                    0           0   
abigailpitts       0                                    0           0   
adam76             0                                    0           0   
adamjones          0                                    0           0   
adamrobbins        0                                    0           0   
...              ...                                  ...         ...   
zmorrow            0                                    0           0   
zrowe              0                                    0           0   
ztownsend          0                                    0           0   
zvelazquez         0                                    0           0   
zwarren            0                                    0           0   

Title         12 ROUND GUN  15-Aug  2015 Dream Con

In [None]:
def get_rules(apriori_df, min_support=0.001):
    # Possibilities of all possible product combinations
    # We say that the products that can be sold together with a min 0.01 probability should come. the probability that each product will be sold together with each other. Applying apriori algorithm.
    frequent_itemsets = apriori(
        apriori_df, min_support=min_support, use_colnames=True)
    # Extracting Association Rules
    # We extract association rules by using the support metric from the dataset that we applied the apriori algorithm.
    rules = association_rules(
        frequent_itemsets, metric="support", min_threshold=min_support)
    return rules

In [None]:
viewer_df_rules = get_rules(apriori_df)
#sort rules descending by lift
viewer_df_rules.sort_values(by='lift',ascending=False)
print(viewer_df_rules)

                                      antecedents  \
0                                  (A Separation)   
1              (Living in Bondage: Breaking Free)   
2            (She-Ra and the Princesses of Power)   
3   (A Young Doctor's Notebook and Other Stories)   
4               (Hank: Five Years from the Brink)   
..                                            ...   
91                               (Justice, LA 92)   
92                            (The Stolen, LA 92)   
93                                      (Justice)   
94                                   (The Stolen)   
95                                        (LA 92)   

                                      consequents  antecedent support  \
0              (Living in Bondage: Breaking Free)            0.001046   
1                                  (A Separation)            0.001046   
2   (A Young Doctor's Notebook and Other Stories)            0.001046   
3            (She-Ra and the Princesses of Power)            0.001046  

In [None]:
def recommend_products(rules_df, product_id, rec_count=7):
    sorted_rules = rules_df.sort_values('lift', ascending=False)
    recommended_products = []

    for i, product in sorted_rules["antecedents"].items():
        for j in list(product):
            if j == product_id:
                # Check if product_id is not in the consequents list
                if product_id not in sorted_rules.iloc[i]["consequents"]:
                    recommended_products.append(
                        list(sorted_rules.iloc[i]["consequents"]))

    recommended_products = list({item for item_list in recommended_products for item in item_list})

    return recommended_products[:rec_count]


In [None]:
def get_golden_shot(target_id,rules):
    recomended_product_ids = recommend_products(rules, target_id)
    print(f'Recommended Products: {recomended_product_ids}\nProduct Names: ')

In [None]:
get_golden_shot("The Stolen", viewer_df_rules)

Recommended Products: ['Paharganj', 'Ha Unlimited', 'The Rise of Phoenixes']
Product Names: 


In [None]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd

# Group shows by usernames
shows_by_user = viewer_df.groupby('Username')['Title'].apply(list)

# Calculate show count for each user
user_show_count = shows_by_user.apply(len)

# Select top 100 users based on show count
top_users = user_show_count.nlargest(100).index
shows_by_top_users = shows_by_user.loc[top_users]

# Convert shows_by_top_users Series to a list of lists
transactions = shows_by_top_users.tolist()

# Convert the data to the appropriate format using TransactionEncoder
te = TransactionEncoder()
te_array = te.fit_transform(transactions)
df_transformed = pd.DataFrame(te_array, columns=te.columns_)

# Set the maximum number of frequent itemsets
max_itemsets = 1000

# Find frequent itemsets
frequent_itemsets = apriori(df_transformed, min_support=0.0001, use_colnames=True, max_len=max_itemsets)

# Print frequent itemsets
print(frequent_itemsets)




     support                                           itemsets
0       0.01                                            (5Gang)
1       0.01                       (A Russell Peters Christmas)
2       0.01                                     (A Separation)
3       0.01      (A Young Doctor's Notebook and Other Stories)
4       0.01                                              (AMO)
..       ...                                                ...
184     0.01  (Urzila Carlson: Overqualified Loser, The Amer...
185     0.01        (The Indian in the Cupboard, Winter's Bone)
186     0.01                           (Trio and a Bed, Umrika)
187     0.01        (Whindersson Nunes: Adult, When Heroes Fly)
188     0.01                       (Justice, The Stolen, LA 92)

[189 rows x 2 columns]


In [None]:
frequent_itemsets = frequent_itemsets.head(1000)

# Extract association rules
association_rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.01)

# Sort the rules based on lift value
association_rules = association_rules.sort_values(by='lift', ascending=False)

# Show the most frequently watched shows together
most_watched= association_rules[['antecedents', 'consequents', 'support', 'lift']]

print(most_watched)

                              antecedents  \
0                          (A Separation)   
1      (Living in Bondage: Breaking Free)   
70                                (Leila)   
69                             (Reaction)   
68                    (La Esclava Blanca)   
..                                    ...   
29     (Melodies of Life - Born This Way)   
28                        (Chillar Party)   
27  (Chhota Bheem aur Krishna vs Zimbara)   
26                (The Rise of Phoenixes)   
95                                (LA 92)   

                              consequents  support   lift  
0      (Living in Bondage: Breaking Free)     0.01  100.0  
1                          (A Separation)     0.01  100.0  
70                       (Resort to Love)     0.01  100.0  
69                    (La Esclava Blanca)     0.01  100.0  
68                             (Reaction)     0.01  100.0  
..                                    ...      ...    ...  
29                        (Chillar Part

In [None]:
def recommend_products(rules_df, product_id, rec_count=7):
    sorted_rules = rules_df.sort_values('lift', ascending=False) # we are sorting the rules dataframe by using "lift" metric
    recommended_products = []  # creating an empty list for holding the recommended products

    for i, product in sorted_rules["antecedents"].items(): # loop on the first products (the products which are in the cart)
        for j in list(product):  # assign to a list for each product
            if j == product_id:  # if the list you return is equal to product_id, which means the product id in the cart
                # consequences column's first product id add to recommended products list
                recommended_products.append(
                    list(sorted_rules.iloc[i]["consequents"]))

    recommended_products = list({item for item_list in recommended_products for item in item_list}) # get unique products

    return recommended_products[:rec_count] # return the recommended_products list by using rec_count limiter

### the recommend_products() function leverages association rules to recommend related products based on the product in the cart. It retrieves the consequents from the rules that have the product in the cart as an antecedent, sorts them by lift, and returns a list of recommended products limited to rec_count.


In [None]:
recommended_products = recommend_products(association_rules, 'The Rise of Phoenixes', rec_count=4)
print(recommended_products)

['Caregiver']


In [None]:
most_watched

Unnamed: 0,antecedents,consequents,support,lift
0,(A Separation),(Living in Bondage: Breaking Free),0.01,100.0
1,(Living in Bondage: Breaking Free),(A Separation),0.01,100.0
70,(Leila),(Resort to Love),0.01,100.0
69,(Reaction),(La Esclava Blanca),0.01,100.0
68,(La Esclava Blanca),(Reaction),0.01,100.0
...,...,...,...,...
29,(Melodies of Life - Born This Way),(Chillar Party),0.01,100.0
28,(Chillar Party),(Melodies of Life - Born This Way),0.01,100.0
27,(Chhota Bheem aur Krishna vs Zimbara),(The Rise of Phoenixes),0.01,100.0
26,(The Rise of Phoenixes),(Chhota Bheem aur Krishna vs Zimbara),0.01,100.0
