# Import Libraries

In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Read in Dataset

In [2]:
# Load the dataset
file_path = '/Users/deadrienhill/codeup-data-science/oneil-clustering-exercises/D212_Churn_Clean.csv'  
df = pd.read_csv(file_path)
# Display first few rows
df.head()

Unnamed: 0,CaseOrder,Customer_id,Interaction,UID,City,State,County,Zip,Lat,Lng,...,MonthlyCharge,Bandwidth_GB_Year,Item1,Item2,Item3,Item4,Item5,Item6,Item7,Item8
0,1,K409198,aa90260b-4141-4a24-8e36-b04ce1f4f77b,e885b299883d4f9fb18e39c75155d990,Point Baker,AK,Prince of Wales-Hyder,99927,56.251,-133.37571,...,172.455519,904.53611,5,5,5,3,4,4,3,4
1,2,S120509,fb76459f-c047-4a9d-8af9-e0f7d4ac2524,f2de8bef964785f41a2959829830fb8a,West Branch,MI,Ogemaw,48661,44.32893,-84.2408,...,242.632554,800.982766,3,4,3,3,4,3,4,4
2,3,K191035,344d114c-3736-4be5-98f7-c72c281e2d35,f1784cfa9f6d92ae816197eb175d3c71,Yamhill,OR,Yamhill,97148,45.35589,-123.24657,...,159.947583,2054.706961,4,4,2,4,4,3,3,3
3,4,D90850,abfa2b40-2d43-4994-b15a-989b8c79e311,dc8a365077241bb5cd5ccd305136b05e,Del Mar,CA,San Diego,92014,32.96687,-117.24798,...,119.95684,2164.579412,4,4,4,2,5,4,3,3
4,5,K662701,68a861fd-0d20-4e51-a587-8a90407ee574,aabb64a116e83fdc4befc1fbab1663f9,Needville,TX,Fort Bend,77461,29.38012,-95.80673,...,149.948316,271.493436,4,4,4,3,4,4,4,5


# Prep Service Related Columns

In [3]:
# Service related columns
service_columns = ['InternetService', 'Phone', 'StreamingTV', 'StreamingMovies', 
                   'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport']
service_df = df[service_columns]

In [4]:
# Convert service columns into transactional format
transactions = service_df.apply(lambda col: col.map(str)).values.tolist()

In [5]:
# One Hot encode with the TransactionEncoder
te = TransactionEncoder()
te_data = te.fit_transform(transactions)
df_transformed = pd.DataFrame(te_data, columns=te.columns_)

# Apply Apriori algorithm

In [6]:
# Apply Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df_transformed, min_support=0.05, use_colnames=True)

In [7]:
# Generate association rules
num_itemsets = len(frequent_itemsets)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0, num_itemsets=num_itemsets)

In [8]:
# Display the top frequent itemsets and association rules
print("Frequent Itemsets:")
print(frequent_itemsets.sort_values(by="support", ascending=False).head(10))

print("\nAssociation Rules:")
print(rules.sort_values(by="lift", ascending=False).head(10))

Frequent Itemsets:
    support                itemsets
3    0.9976                   (Yes)
2    0.9949                    (No)
8    0.9925               (No, Yes)
1    0.6537           (Fiber Optic)
7    0.6521      (Fiber Optic, Yes)
6    0.6510       (No, Fiber Optic)
10   0.6494  (No, Fiber Optic, Yes)
0    0.3463                   (DSL)
5    0.3455              (DSL, Yes)
4    0.3439               (DSL, No)

Association Rules:
          antecedents         consequents  antecedent support  \
2                (No)       (Fiber Optic)              0.9949   
3       (Fiber Optic)                (No)              0.6537   
8                (No)  (Fiber Optic, Yes)              0.9949   
7  (Fiber Optic, Yes)                (No)              0.6521   
6           (No, Yes)       (Fiber Optic)              0.9925   
9       (Fiber Optic)           (No, Yes)              0.6537   
0               (DSL)               (Yes)              0.3463   
1               (Yes)               (DSL)    

In [9]:
# Save to CSV
frequent_itemsets.to_csv("frequent_itemsets.csv", index=False)
rules.to_csv("association_rules.csv", index=False)