In [None]:
import pandas as pd # importing pandas
import os # import os (operating system module) to fetch directory (folder) contents,
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
directory = 'bist30' #The directory where the individual csv files for stocks are
files = os.listdir(directory) #get the files in the bist30 directory
print(files)

In [None]:
#Read an example individual raw dataset for stock AKBNK and show it
df=pd.read_csv('bist30/AKBNK.IS.csv')
df

In [None]:
colNo=0 #Used when each stock column is inserted into the dataframe
noOfRows=590 #Each stock should have 589 transactions + 1 column headings row
dfAll=pd.DataFrame() #Create an empty dataframe which will hold all stocks data
#User decides how many stocks to process altogether
NoOfStocksToProcess=eval(input("How many stocks would you like to process: "))
for filename in files:
    if filename.endswith('.csv'): #check that it is a csv file
        df=pd.read_csv(directory+"/"+filename)
        if len(df.index)==noOfRows: #if the data is complete for the stock (correct no.of rows:590)
            closeColumn = df["Close"] #Take the data from the Close column
            # Find '.' from the start and extract filename without extension
            colName=filename[0:filename.find(".")]
            dfAll.insert(colNo, colName, closeColumn) #Insert the Close column of the stock using stock name
            colNo+=1
    if colNo==NoOfStocksToProcess: break #Exit the loop if the required number of stock data has been inserted
print('Merged dataset:')
dfAll #See the merged data

In [None]:
df=dfAll #Now df also dataframe has all stocks data
increaseOrDecrease=input("Would you like price increase (i) or decrease (d) associations?: ")
for column in df:
    df['next'] = df[column].shift(-1)
    df[column]=df['next']-df[column]
    df.drop(columns='next', inplace=True)
    if increaseOrDecrease.lower()=="i":
        df.loc[df[column] > 0, column] = 1
        df.loc[df[column] <= 0, column] = 0
        increaseOrDecreaseText ="increased"
    else:
        df.loc[df[column] >= 0, column] = 0
        df.loc[df[column] <0, column] = 1
        increaseOrDecreaseText ="decreased"
df


In [None]:
df=df.dropna(axis=0) #Drop the rows where data is not available
df

In [None]:
df.describe().loc[['mean', 'std']] #See the essential statistics for outliers

In [None]:
#Show charts for outliers if any
plt.figure(figsize = (9,6))
Values= list(df.sum().sort_values(ascending=True))
column_names = list(df.columns.values)
plt.style.use('default')
plt.barh(column_names, Values)
plt.title('Number of days the stock prices ' + increaseOrDecreaseText)
plt.xlabel('Number of days ' + increaseOrDecreaseText)
plt.show()

In [None]:
plt.figure(figsize = (9,6))
Values=df.mean()*100
Values=list(Values.sort_values(ascending=True))
column_names = list(df.columns.values)
plt.barh(column_names, Values)
plt.title('Percentage of number of days the stock price ' + increaseOrDecreaseText)
plt.xlabel('Percentage of days ')
for i in range(len(Values)):
    plt.text(x= 40,y= i,s= str(round(Values[i]))+'%', color='white')
plt.show()


In [None]:
plt.figure(figsize = (9,6))
Values= list(df.std().sort_values(ascending = False))
plt.barh(column_names, Values)
plt.title('Standard Deviation of number of days the stock price ' + increaseOrDecreaseText)
plt.xlabel('Standard Deviation')
for i in range(len(Values)):
    plt.text(x= 0.4,y= i,s= round(Values[i],4), color='white')
plt.show()

In [None]:
#Now replace 1s with True and 0s with False. 
df = df.replace(1,True)
df = df.replace(0,False)
df.head()
#and Data has been preprocessed

In [None]:
#Now get frequent itemsets
#get the minimum support from the user
minSup=eval(input("Please enter minimum Support value: "))
#Obtain the frequent itemsets
df_frequent = apriori(df, min_support = minSup, use_colnames = True, verbose = 1)
print('The Frequent itemsets with Minimum Support='+ str(minSup))
df_frequent.sort_values("support",ascending=False)


In [None]:
#Now get Association rules
#get reqired the minimum confidence
minConf=eval(input("Please enter minimum Confidence value: "))
df_ar = association_rules(df_frequent, metric = "confidence", min_threshold = minConf)
#Filter rules for lift>1
df_ar = df_ar[df_ar['lift'] > 1]
df_ar = df_ar[['antecedents', 'consequents','antecedent support','consequent support','support','confidence','lift']]
print('The association rules with Minimum Support='+ str(minSup) + ' and Minimum Confidence='+ str(minConf) + ' and Lift>1')
df_ar.sort_values('lift', ascending = False)

In [None]:
sns.scatterplot(data=df_ar,x='support',y='confidence')
plt.title('Support versus Confidence of Association Rules')
plt.show()

In [None]:
from mpl_toolkits import mplot3d

plt.figure(figsize = (7,7))
ax = plt.axes(projection='3d')
x=df_ar['support']
y=df_ar['confidence']
z=df_ar['lift']
#adding title and labels
ax.set_title("3D Scatter plot of support-confidence-lift")
ax.set_xlabel('SUPPORT')
ax.set_ylabel('CONFIDENCE')
ax.set_zlabel('LIFT')
ax.xaxis.label.set_color('red')
ax.yaxis.label.set_color('red')
ax.zaxis.label.set_color('red')
#ax.scatter(data=df_ar, x='support', y='confidence', z='lift')
ax.scatter(x, y, z,color='green')
plt.show()

###### MUSA'S CODE STARTS HERE


In [None]:
# Extract frequent itemsets using Apriori algorithm
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)

# Print frequent patterns
print("Frequent Patterns:")
print(frequent_itemsets)

# Get user input for min_support and min_confidence
min_support = float(input("Enter the minimum support: "))
min_confidence = float(input("Enter the minimum confidence: "))

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)

# Filter rules based on min_support and lift
strong_rules = rules[(rules['support'] >= min_support) & (rules['lift'] >= 1)]

# Print strong association rules
print("\nStrong Association Rules:")
for _, row in strong_rules.iterrows():
    antecedents = ', '.join(str(item) for item in row['antecedents'])
    consequents = ', '.join(str(item) for item in row['consequents'])
    confidence = row['confidence']
    print(f"{antecedents} -> {consequents} (Confidence: {confidence})")

# Generate scatterplot of Support versus Confidence
support_values = strong_rules['support']
confidence_values = strong_rules['confidence']

plt.figure(figsize=(8, 6))
plt.scatter(support_values, confidence_values)
plt.title('Support versus Confidence of Association Rules')
plt.xlabel('Support')
plt.ylabel('Confidence')
plt.show()