In [3]:
import numpy as np
import pandas as pd

In [4]:
data = pd.read_csv('./first_year_returns_data_normalized.csv')

In [5]:
column_to_drop=['cluster', 'returns_1yr']
first_year=data.drop(columns=column_to_drop)

In [6]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

kmeans = KMeans(n_clusters=10)  

kmeans.fit(first_year)

cluster_labels = kmeans.labels_

silhouette_avg = silhouette_score(first_year, cluster_labels)

first_year['cluster'] = cluster_labels

  super()._check_params_vs_input(X, default_n_init=10)


In [7]:
first_year

Unnamed: 0,expense_ratio,risk_level,cluster
0,0.083004,0.4,5
1,0.118577,0.0,7
2,0.185771,0.8,6
3,0.217391,1.0,0
4,0.438735,1.0,8
...,...,...,...
620,0.106719,0.2,1
621,0.150198,0.4,5
622,0.355731,0.6,9
623,0.434783,1.0,8


In [8]:
first_year.to_csv('first_year.csv', index=False)

In [9]:
scheme_codes = pd.read_csv('./scheme_codes.csv')

In [10]:
first_year_scheme_codes=pd.merge(first_year, scheme_codes, left_index=True, right_index=True)

In [11]:
first_year_scheme_codes.to_csv('test.csv', index=False)

In [12]:
first_year1=first_year_scheme_codes.copy()

In [13]:
first_year1

Unnamed: 0,expense_ratio,risk_level,cluster,scheme_code
0,0.083004,0.4,5,100033
1,0.118577,0.0,7,100034
2,0.185771,0.8,6,100037
3,0.217391,1.0,0,100038
4,0.438735,1.0,8,100041
...,...,...,...,...
620,0.106719,0.2,1,100800
621,0.150198,0.4,5,100801
622,0.355731,0.6,9,100802
623,0.434783,1.0,8,100803


In [14]:
def get_cluster_number(scheme_code):
    cluster_number = first_year_scheme_codes.loc[first_year_scheme_codes['scheme_code'] == scheme_code, 'cluster'].values
    if len(cluster_number) > 0:        
        return cluster_number[0]
    else:
        return None

In [15]:
import numpy as np
import pandas as pd

df = pd.DataFrame()

def compute_average_returns():
    global df
    returns = pd.read_csv('./Predicted_returns.csv')
    df = pd.merge(first_year1, returns, left_index=True, right_index=True)
    avg_returns_by_cluster = df.groupby('cluster')['returns_1yr'].sum() / df.groupby('cluster')['returns_1yr'].count()
    avg_returns = avg_returns_by_cluster.reset_index()
    avg_returns.columns = ['cluster_no', 'avg']
    return avg_returns

class MutualFundClusterRecommendation:
    def __init__(self, avg_returns):
        self.avg_returns = avg_returns
        self.num_clusters = len(avg_returns)
        self.cluster_rewards = np.zeros(self.num_clusters)
        self.cluster_counts = np.zeros(self.num_clusters)
        self.cumulative_rewards = np.zeros(self.num_clusters)
    
    def update_rewards(self, cluster_idx, reward, invest_in_cluster=False):
        if invest_in_cluster:
            self.cluster_rewards[cluster_idx] += reward * 2  # Double reward if user invests in the cluster
        else:
            self.cluster_rewards[cluster_idx] += reward
    
    def choose_best_cluster(self):
        return np.argmax(self.avg_returns['avg'])
        
    def update_cumulative_rewards(self):
        self.cumulative_rewards += self.cluster_rewards
    
    def recommend_best_cluster(self):
        best_cluster = np.argmax(self.cumulative_rewards)
        return best_cluster

# Sample DataFrame for demonstration

avg_returns = compute_average_returns()
recommendation_system = MutualFundClusterRecommendation(avg_returns)

# Simulating 10 days
for day in range(4):
    
    # For demonstration, let's assume the user invests in the second cluster every day
    scheme_code = int(input("Enter scheme code you want to invest, enter -1 if you don't want to: "))
    if scheme_code != -1:
        cluster_number = get_cluster_number(scheme_code)
        recommendation_system.update_rewards(cluster_number, 1, invest_in_cluster=True)
    else:
        best_cluster = recommendation_system.choose_best_cluster()
        recommendation_system.update_rewards(best_cluster, 1)
    
    # Updating cumulative rewards
    recommendation_system.update_cumulative_rewards()

# Recommending the best cluster after 4 days
best_cluster_recommendation = recommendation_system.recommend_best_cluster()
print("Recommended cluster after 4 days:", best_cluster_recommendation)

Enter scheme code you want to invest, enter -1 if you don't want to:  104483
Enter scheme code you want to invest, enter -1 if you don't want to:  -1
Enter scheme code you want to invest, enter -1 if you don't want to:  100286
Enter scheme code you want to invest, enter -1 if you don't want to:  -1


Recommended cluster after 4 days: 7


In [17]:
df.to_csv('df.csv', index=False)

In [18]:
# Assuming you have a DataFrame named df with columns 'cluster', 'returns_1yr', and 'scheme_code'

def get_top_scheme_codes(df, cluster):
    # Filter the DataFrame for the given cluster
    cluster_df = df[df['cluster'] == cluster]
    
    # Sort the DataFrame by returns_1yr in descending order
    sorted_cluster_df = cluster_df.sort_values(by='returns_1yr', ascending=False)
    
    # Get the top 5 scheme_codes
    top_5_scheme_codes = sorted_cluster_df.head(5)['scheme_code'].tolist()
    
    return top_5_scheme_codes

# Example usage:
cluster = best_cluster_recommendation
top_scheme_codes = get_top_scheme_codes(df, cluster)
print(top_scheme_codes)

[104483, 109935, 100638, 100286, 100034]
