In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
import numpy as np
import joblib

data = pd.read_csv('MCB(MCB_Bank_Limited).csv')

# Ensure the 'TIME' column is in datetime format
data['TIME'] = pd.to_datetime(data['TIME'], format='%y-%m-%d')

# Features and target variable
features = ['OPEN', 'HIGH', 'LOW', 'VOLUME']
target = 'CLOSE'

X = data[features]
y = data[target]


feature_imputer = SimpleImputer(strategy='mean')
X = feature_imputer.fit_transform(X)


target_imputer = SimpleImputer(strategy='mean')
y = target_imputer.fit_transform(y.values.reshape(-1, 1)).ravel()


n_clusters = 3
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
train_clusters = kmeans.fit_predict(X)

#Dictionary to hold models for each cluster
models = {}

#Train a separate regression model for each cluster
for cluster in range(n_clusters):
    cluster_data = X[train_clusters == cluster]
    cluster_target = y[train_clusters == cluster]
    model = LinearRegression().fit(cluster_data, cluster_target)
    models[cluster] = model

#Save the models
joblib.dump(kmeans, 'kmeans_model.pkl')
for cluster in range(n_clusters):
    joblib.dump(models[cluster], f'linear_model_cluster_{cluster}.pkl')


def predict(X):
    clusters = kmeans.predict(X)
    predictions = np.zeros(len(X))
    for cluster in range(n_clusters):
        cluster_indices = (clusters == cluster)
        if np.sum(cluster_indices) > 0:
            predictions[cluster_indices] = models[cluster].predict(X[cluster_indices])
    return predictions

#index of the last date before 9-Oct-20
last_date_index = data[data['TIME'] == '2020-10-09'].index[-1]


last_date = data['TIME'].iloc[last_date_index]

#using recursive forcasting approach
predicted_dates = []
predicted_values = []

for i in range(50):  # Predict for the next 90 days
    # Predict for the next day
    next_date = last_date + pd.Timedelta(days=1)
    next_data = X[last_date_index + i % len(X)].reshape(1, -1)  # Use the last available data for prediction
    next_prediction = predict(next_data)[0]

    # Append the predicted date and value, rounded to two decimal places
    predicted_dates.append(next_date)
    predicted_values.append(round(next_prediction, 2))

    # Update the last date for the next prediction
    last_date = next_date
    next_data[0, -1] = next_prediction
    X = np.vstack([X, next_data])

#Creating a DataFrame for the predicted values and dates
predicted_df = pd.DataFrame({'DATE': predicted_dates, 'PREDICTED_CLOSE': predicted_values})

output_file = 'mcb_PredictedPrice_After9Oct2020Upto50Days.xlsx'
predicted_df.to_excel(output_file, index=False)

from google.colab import files
files.download(output_file)


print(predicted_df)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

         DATE  PREDICTED_CLOSE
0  2020-10-10           175.79
1  2020-10-11           174.10
2  2020-10-12           173.36
3  2020-10-13           171.76
4  2020-10-14           173.11
5  2020-10-15           175.04
6  2020-10-16           174.75
7  2020-10-17           174.55
8  2020-10-18           176.40
9  2020-10-19           173.67
10 2020-10-20           176.43
11 2020-10-21           175.84
12 2020-10-22           177.28
13 2020-10-23           176.27
14 2020-10-24           177.75
15 2020-10-25           178.94
16 2020-10-26           179.35
17 2020-10-27           178.08
18 2020-10-28           179.20
19 2020-10-29           177.70
20 2020-10-30           179.57
21 2020-10-31           178.31
22 2020-11-01           172.79
23 2020-11-02           171.10
24 2020-11-03           173.18
25 2020-11-04           173.10
26 2020-11-05           173.22
27 2020-11-06           172.55
28 2020-11-07           172.17
29 2020-11-08           172.20
30 2020-11-09           172.68
31 2020-

In [None]:
from google.colab import files
uploaded = files.upload()

Saving MCB(MCB_Bank_Limited).csv to MCB(MCB_Bank_Limited).csv
