In [2]:
import pandas as pd
df=pd.read_csv("D:\study\data\project\cleaned.csv")
lst=[]
for item in df['User_ID'].unique():
    lst2=list(set(df[df['User_ID']==item]['Product_ID']))
    if len(lst2)>0:
        lst.append(lst2)
        from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

te=TransactionEncoder()
te_data=te.fit(lst).transform(lst)
df_x=pd.DataFrame(te_data,columns=te.columns_)
print(df_x.head())

   P00000142  P00000242  P00000342  P00000442  P00000542  P00000642  \
0       True      False      False      False      False      False   
1      False      False      False      False      False      False   
2      False      False      False      False      False      False   
3      False      False      False      False      False      False   
4      False      False      False      False      False      False   

   P00000742  P00000842  P00000942  P00001042    ...     P0098942  P0099042  \
0      False      False      False      False    ...        False     False   
1      False      False      False      False    ...        False     False   
2      False      False      False      False    ...        False     False   
3      False      False      False      False    ...        False     False   
4      False      False      False      False    ...        False     False   

   P0099142  P0099242  P0099342  P0099442  P0099642  P0099742  P0099842  \
0     False     False  

In [40]:
frequent_items=apriori(df_x,use_colnames=True,min_support=0.05)
print(frequent_items)

       support                           itemsets
0     0.191818                        (P00000142)
1     0.062977                        (P00000242)
2     0.086912                        (P00000642)
3     0.083857                        (P00001042)
4     0.097097                        (P00001142)
5     0.059243                        (P00001642)
6     0.076218                        (P00001742)
7     0.123578                        (P00002142)
8     0.055169                        (P00002442)
9     0.090307                        (P00002542)
10    0.140893                        (P00003242)
11    0.161433                        (P00003442)
12    0.085384                        (P00003642)
13    0.123918                        (P00003942)
14    0.100323                        (P00004742)
15    0.162112                        (P00005042)
16    0.084026                        (P00006942)
17    0.059243                        (P00009342)
18    0.059073                        (P00010242)


In [41]:
rules=association_rules(frequent_items,metric='lift',min_threshold=1)
rules.antecedents=rules.antecedents.apply(lambda x: next(iter(x)))
rules.consequents=rules.consequents.apply(lambda x: next(iter(x)))
rules=rules.sort_values('lift',ascending=False)
print(rules)

     antecedents consequents  antecedent support  consequent support  \
2287   P00193542   P00120042            0.102869            0.151417   
2286   P00120042   P00193542            0.151417            0.102869   
2607   P00250242   P00248142            0.128841            0.137328   
2606   P00248142   P00250242            0.137328            0.128841   
1161   P00120042   P00057942            0.151417            0.131387   
1160   P00057942   P00120042            0.131387            0.151417   
1394   P00140742   P00073842            0.132575            0.156340   
1395   P00073842   P00140742            0.156340            0.132575   
1515   P00086442   P00323942            0.162281            0.133424   
1514   P00323942   P00086442            0.133424            0.162281   
2150   P00329542   P00114942            0.117977            0.197250   
2151   P00114942   P00329542            0.197250            0.117977   
2458   P00154042   P00270942            0.104397            0.19

In [47]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)

import networkx as nx

nx_data=rules[rules.lift>=2]
GA=nx.from_pandas_edgelist(nx_data,source='antecedents',target='consequents',edge_attr='lift')
pos=nx.kamada_kawai_layout(GA,weight='lift')
# pos = nx.nx_agraph.graphviz_layout(GA)
# pos = nx.nx_agraph.graphviz_layout(GA, prog='dot')

edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')

for edge in GA.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=2)))

for node in GA.nodes():
    x, y = pos[node]
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])

for node,adjacencies in enumerate(GA.adjacency()):
    node_trace['marker']['color']+=tuple([len(adjacencies[1])])
    node_info = str(adjacencies[0])+' - # of connections: '+str(len(adjacencies[1]))
    node_trace['text']+=tuple([node_info])
    
fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='<br>association_rules graph',
                titlefont=dict(size=16),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="Python code: <a href='https://plot.ly/ipython-notebooks/network-graphs/'> https://plot.ly/ipython-notebooks/network-graphs/</a>",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
plot(fig, filename='association_rules.html')

'file://D:\\program\\python\\Scripts\\association_rules.html'

In [46]:
pd.DataFrame(frequent_items).to_csv('frequent_items.csv')
pd.DataFrame(rules).to_csv('rules.csv')