# Bitcoin Price Range History 20250901

This data is in format csv and has around 2 million rows. This dataset has columns as such

1. Start : Date (Indicates the start date of the data record) (Format XX-XX-XXXX) (NUMERIC)

2. End : Date ( Indicates the end date of the data record) (Format XX/XX/XXXX) (NUMERIC)

3. Open : Number ( The price at which Bitcoin Ended trading at the beginning of the day.) (NUMERIC)

4. High : Number (The highest price point reached by Bitcoin during the day.) (NUMERIC)

5. Low : Number (The lowest price point reached by Bitcoin during the day.) (NUMERIC)

6. Close : Number (The price at which Bitcoin ended trading at the close of the day.) (NUMERIC)

7. Volume : Number (Total volume of Bitcoin traded during the day.) 

8. Market Cap : Number (The total market value of Bitcoin at the end of the day.) 

In [21]:
# Imports
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import sklearn


from reinforcement.sarsa import SARSAWumpus
from reinforcement.qLearning import QLearningWumpus
from supervisedLearning.ann import ArtificialNeuralNetwork
from supervisedLearning.knn import KNearestNeigbor
from supervisedLearning.regression import PolynomialRegression
from supervisedLearning.svm import SupportVectorMachine
from unsupervisedLearning.dbscan import DBScan
from unsupervisedLearning.kMeans import KMeans
from unsupervisedLearning.pca import PCA

In [None]:
# read file
dataset1 = "../data/bitcoin_2010-07-17_2024-06-28.csv"
dataset2 = "../data/iris.csv"

df1 = pd.read_csv(dataset1)
df2 = pd.read_csv(dataset2)

# Exploratory Data Analysis

In [None]:
# jumlah data
print(df1.count())

## Mean

In [None]:
# start
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.hour.mean())
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.minute.mean())
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.second.mean())

#end 
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.date.std())
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.month.std())
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.year.std())

# latitude
print(df1["Open"].std())
print(df1["High"].std())
print(df1["Low"].std())
print(df1["Close"].std())
print(df1["Volume"].std())
print(df1["Market Cap"].std())



## Standard Deviation

In [None]:
# start
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.hour.std())
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.minute.std())
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.second.std())

#end 
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.date.std())
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.month.std())
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.year.std())

# latitude
print(df1["Open"].std())
print(df1["High"].std())
print(df1["Low"].std())
print(df1["Close"].std())
print(df1["Volume"].std())
print(df1["Market Cap"].std())

## Minimum

In [None]:
# start
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.hour.min())
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.minute.min())
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.second.min())

#end 
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.date.min())
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.month.min())
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.year.min())

# latitude
print(df1["Open"].min())
print(df1["High"].min())
print(df1["Low"].min())
print(df1["Close"].min())
print(df1["Volume"].min())
print(df1["Market Cap"].min())

## Maximum

In [None]:
# start
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.hour.max())
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.minute.max())
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.second.max())

#end 
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.date.max())
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.month.max())
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.year.min())

# latitude
print(df1["Open"].max())
print(df1["High"].max())
print(df1["Low"].max())
print(df1["Close"].max())
print(df1["Volume"].max())
print(df1["Market Cap"].max())

## Quantile

In [None]:
# start
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.hour.quantile([0.25, 0.5, 0.75]))
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.minute.quantile([0.25, 0.5, 0.75]))
print(pd.to_datetime(df1["Start"], format="%Y-%m-%d").dt.second.quantile([0.25, 0.5, 0.75]))

#end 
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.date.quantile([0.25, 0.5, 0.75]))
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.month.quantile([0.25, 0.5, 0.75]))
print(pd.to_datetime(df1["End"], format='%Y-%m-%d').dt.year.quantile([0.25, 0.5, 0.75]))

# latitude
print(df1["Open"].quantile([0.25, 0.5, 0.75]))
print(df1["High"].quantile([0.25, 0.5, 0.75]))
print(df1["Low"].quantile([0.25, 0.5, 0.75]))
print(df1["Close"].quantile([0.25, 0.5, 0.75]))
print(df1["Volume"].quantile([0.25, 0.5, 0.75]))
print(df1["Market Cap"].quantile([0.25, 0.5, 0.75]))

# Data distribution check

## Numeric

In [None]:
# start
sns.histplot(pd.to_datetime(df1["Start"], format="%Y-%m-%d",  errors="coerce").dt.year, bins=24, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=pd.to_datetime(df1["Start"], format="%Y-%m-%d",  errors="coerce").dt.year)
plt.show()
 
# start
sns.histplot(pd.to_datetime(df1["Start"], format="%Y-%m-%d",  errors="coerce").dt.month, bins=24, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=pd.to_datetime(df1["Start"], format="%Y-%m-%d",  errors="coerce").dt.month)
plt.show()
 
# start
sns.histplot(pd.to_datetime(df1["Start"], format="%Y-%m-%d",  errors="coerce").dt.date, bins=24, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=pd.to_datetime(df1["Start"], format="%Y-%m-%d",  errors="coerce").dt.date)
plt.show()
 
# end
sns.histplot(pd.to_datetime(df1["End"], format="%Y-%m-%d",  errors="coerce").dt.year, bins=60, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=pd.to_datetime(df1["End"], format="%Y-%m-%d", errors="coerce").dt.year)
plt.show()

# end
sns.histplot(pd.to_datetime(df1["End"], format="%Y-%m-%d",  errors="coerce").dt.month, bins=60, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=pd.to_datetime(df1["End"], format="%Y-%m-%d", errors="coerce").dt.month)
plt.show()

# end
sns.histplot(pd.to_datetime(df1["End"], format="%Y-%m-%d",  errors="coerce").dt.date, bins=60, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=pd.to_datetime(df1["End"], format="%Y-%m-%d", errors="coerce").dt.date)
plt.show()

# open
sns.histplot(df1["Open"], bins=60, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=df1["Open"], errors="coerce")
plt.show()

# open
sns.histplot(df1["High"], bins=60, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=df1["High"], errors="coerce")
plt.show()

# open
sns.histplot(df1["Low"], bins=60, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=df1["Low"], errors="coerce")
plt.show()

# open
sns.histplot(df1["Close"], bins=60, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=df1["Close"], errors="coerce")
plt.show()

# open
sns.histplot(df1["Volume"], bins=60, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=df1["Volume"], errors="coerce")
plt.show()

# open
sns.histplot(df1["Market Cap"], bins=60, kde=True)
plt.show()

# Boxplot
sns.boxplot(x=df1["Market Cap"], errors="coerce")
plt.show()


## Outlier Check (Z-value check)

## Numerical

### Start

In [None]:
mean = pd.to_datetime(df1["Start"], format='%Y-%m-%d', errors='coerce').dt.year.mean()
std = pd.to_datetime(df1["Start"], format='%Y-%m-%d', errors='coerce').dt.year.std()

z_score = (pd.to_datetime(df1["Start"], format='%Y-%m-%d', errors='coerce').dt.year - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")

mean = pd.to_datetime(df1["Start"], format='%Y-%m-%d', errors='coerce').dt.month.mean()
std = pd.to_datetime(df1["Start"], format='%Y-%m-%d', errors='coerce').dt.month.std()

z_score = (pd.to_datetime(df1["Start"], format='%Y-%m-%d', errors='coerce').dt.month - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")

mean = pd.to_datetime(df1["Start"], format='%Y-%m-%d', errors='coerce').dt.date.mean()
std = pd.to_datetime(df1["Start"], format='%Y-%m-%d', errors='coerce').dt.date.std()

z_score = (pd.to_datetime(df1["Start"], format='%Y-%m-%d', errors='coerce').dt.date - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")

### End

In [None]:
mean = pd.to_datetime(df1["End"], format='%Y-%m-%d', errors='coerce').dt.year.mean()
std = pd.to_datetime(df1["End"], format='%Y-%m-%d', errors='coerce').dt.year.std()

z_score = (pd.to_datetime(df1["End"], format='%Y-%m-%d', errors='coerce').dt.year - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")

mean = pd.to_datetime(df1["End"], format='%Y-%m-%d', errors='coerce').dt.month.mean()
std = pd.to_datetime(df1["End"], format='%Y-%m-%d', errors='coerce').dt.month.std()

z_score = (pd.to_datetime(df1["End"], format='%Y-%m-%d', errors='coerce').dt.month - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")

mean = pd.to_datetime(df1["End"], format='%Y-%m-%d', errors='coerce').dt.date.mean()
std = pd.to_datetime(df1["End"], format='%Y-%m-%d', errors='coerce').dt.date.std()

z_score = (pd.to_datetime(df1["End"], format='%Y-%m-%d', errors='coerce').dt.date - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")

### Open

In [None]:
mean = df1["Open"].mean()
std = df1["Open"].std()

z_score = (df1["Open"] - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")


### High

In [None]:
mean = df1["High"].mean()
std = df1["High"].std()

z_score = (df1["High"] - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")


### Low

In [None]:
mean = df1["Low"].mean()
std = df1["Low"].std()

z_score = (df1["Low"] - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")


### Close

In [None]:
mean = df1["Close"].mean()
std = df1["Close"].std()

z_score = (df1["Close"] - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")


### Volume

In [None]:
mean = df1["Volume"].mean()
std = df1["Volume"].std()

z_score = (df1["Volume"] - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")


### Market Cap

In [None]:
mean = df1["Market Cap"].mean()
std = df1["Market Cap"].std()

z_score = (df1["Market Cap"] - mean) / std
outliers = df1[z_score.abs() > 3]
print(f"Outliers count {len(outliers)}")


# Data preprocessing
This stage include data cleaning (handling missing values, remove duplicates, correct error and inconsistencies), Data transformation (normalization, standardization, categorical data encoding) 
Feature selection, and Dimensionality reduction. Handling missing values will be approached in categorical values first. If null is found, then it will changed into modus of that certain feature. 
If outlier is found, it will removed from the dataset. In numerical values, every null value will be changed into the mean value of that feature related to that certain crime. If outlier is found,
it will be removed from the dataset. 

## Data Cleaning

### Start

In [None]:
# change all rows without the null values in very row
year_mean = int(pd.to_datetime(df1["Start"], format="%Y-%m-%d", errors="coerce").dt.year.mean())
month_mean = int(pd.to_datetime(df1["Start"], format="%Y-%m-%d", errors="coerce").dt.month.mean())
date_mean = int(pd.to_datetime(df1["Start"], format="%Y-%m-%d", errors="coerce").dt.date.mean())
datetime_mean = f"{month_mean:02d}-{date_mean:02d}-{year_mean:02d}"
df1.loc[df1["Start"] == "(null)", "Start"] = datetime_mean

### End

In [None]:
year_mean = int(pd.to_datetime(df1["End"], format="%Y-%m-%d", errors="coerce").dt.year.mean())
month_mean = int(pd.to_datetime(df1["End"], format="%Y-%m-%d", errors="coerce").dt.month.mean())
date_mean = int(pd.to_datetime(df1["End"], format="%Y-%m-%d", errors="coerce").dt.date.mean())
datetime_mean = f"{month_mean:02d}-{date_mean:02d}-{year_mean:02d}"
df1.loc[df1["End"] == "(null)", "End"] = datetime_mean

### Open

In [None]:
mean = df1["Open"].mean()
df1.loc[df1["Open"] == "(null)", "Open"] = mean

### High

In [None]:
mean = df1["High"].mean()
df1.loc[df1["High"] == "(null)", "High"] = mean

### Low

In [None]:
mean = df1["Low"].mean()
df1.loc[df1["Low"] == "(null)", "Low"] = mean

### Close

In [None]:
mean = df1["Close"].mean()
df1.loc[df1["Close"] == "(null)", "Close"] = mean

### Volume

In [None]:
mean = df1["Volume"].mean()
df1.loc[df1["Volume"] == "(null)", "Volume"] = mean

### Market cap

In [None]:
mean = df1["Market Cap"].mean()
df1.loc[df1["Market Cap"] == "(null)", "Market Cap"] = mean

# Data Encoding
Encoding is needed to be able to get the value computed by algorithms (KNN, and many algorithms that rely on doing it using numbers). In this case, it is only needed to convert the date in the Start column into continuous value (in second relative from year 0)

In [None]:
yearToSecond = pd.to_datetime(df1["Start"], format="%Y-%m-%d", errors="coerce").dt.year * 365 * 24 * 60 * 60
monthToSecond = pd.to_datetime(df1["Start"], format="%Y-%m-%d", errors="coerce").dt.month * 30 * 24 * 60 * 60
dateToSecond = pd.to_datetime(df1["Start"], format="%Y-%m-%d", errors="coerce").dt.day * 24 * 60 * 60
df1["Start"] = yearToSecond + monthToSecond + dateToSecond

yearToSecond = pd.to_datetime(df1["End"], format="%Y-%m-%d", errors="coerce").dt.year * 365 * 24 * 60 * 60
monthToSecond = pd.to_datetime(df1["End"], format="%Y-%m-%d", errors="coerce").dt.month * 30 * 24 * 60 * 60
dateToSecond = pd.to_datetime(df1["End"], format="%Y-%m-%d", errors="coerce").dt.day * 24 * 60 * 60
df1["End"] = yearToSecond + monthToSecond + dateToSecond

## Feature Selection
Feature selection is done to get only the relevant column, that is the all of the column except the end date and for processing SVM, there will be a new column that will store the year and month in year-month format

In [None]:
# Generating new column to period
df1["Start"] = pd.to_datetime(df1["Start"], format="%Y-%m-%d")

# Extract year and month in YYYY-MM format
df1["Period"] = df1["Start"].dt.strftime("%Y-%m")

In [None]:
# listing feature and class of new column period
features = ["Start", "Open", "High", "Low", "Close", "Market Cap", "Volume", "Period"]

X = df1.loc[:, features[:-1]]
Y = df1.loc[:, features[-1]]

classes = Y.unique()
print(classes)

## Dimensionality Reduction
Dimensionality reduction is done to reduce the noise of the characteristics of the attributes and
focus on what causing and the pattern on which the event emerges. In this example, there is no need to do so because every value is in simple form and it is not intended to determine a new column that is significant to the particular target label

## Balancing
Based on the diagram in the EDA, there are a pattern of skewness of the data in the time context, but
other than that, there is a nice variance (not to significant) such that the model that will generated from this dataset is perceived not be biased. Thus, it is not needed to be balanced with
any other method (balancing methods)

# Bagian 2 (Supervised Learning)

## K Nearest Neighbor

#### Hold out validation

##### Implementasi Manual

In [None]:
# partition into 80% and 20% (testing)
nTrain = int(0.8 * len(df1))
trainData = df1.iloc[:nTrain][features]
testData = df1.iloc[nTrain:][features]

# init
# "Start", "Open", "High", "Low", "Close", "Market Cap", "Volume", "Period"
startVal = float(input()) # in miliseconds
openVal = float(input())
highVal = float(input())
lowVal = float(input())
closeVal = float(input())
marketCapVal = float(input())
periodVal = (input())
# choose periodval out of this matrix
'''['2024-06' '2024-05' '2024-04' '2024-03' '2024-02' '2024-01' '2023-12'
 '2023-11' '2023-10' '2023-09' '2023-08' '2023-07' '2023-06' '2023-05'
 '2023-04' '2023-03' '2023-02' '2023-01' '2022-12' '2022-11' '2022-10'
 '2022-09' '2022-08' '2022-07' '2022-06' '2022-05' '2022-04' '2022-03'
 '2022-02' '2022-01' '2021-12' '2021-11' '2021-10' '2021-09' '2021-08'
 '2021-07' '2021-06' '2021-05' '2021-04' '2021-03' '2021-02' '2021-01'
 '2020-12' '2020-11' '2020-10' '2020-09' '2020-08' '2020-07' '2020-06'
 '2020-05' '2020-04' '2020-03' '2020-02' '2020-01' '2019-12' '2019-11'
 '2019-10' '2019-09' '2019-08' '2019-07' '2019-06' '2019-05' '2019-04'
 '2019-03' '2019-02' '2019-01' '2018-12' '2018-11' '2018-10' '2018-09'
 '2018-08' '2018-07' '2018-06' '2018-05' '2018-04' '2018-03' '2018-02'
 '2018-01' '2017-12' '2017-11' '2017-10' '2017-09' '2017-08' '2017-07'
 '2017-06' '2017-05' '2017-04' '2017-03' '2017-02' '2017-01' '2016-12'
 '2016-11' '2016-10' '2016-09' '2016-08' '2016-07' '2016-06' '2016-05'
 '2016-04' '2016-03' '2016-02' '2016-01' '2015-12' '2015-11' '2015-10'
 '2015-09' '2015-08' '2015-07' '2015-06' '2015-05' '2015-04' '2015-03'
 '2015-02' '2015-01' '2014-12' '2014-11' '2014-10' '2014-09' '2014-08'
 '2014-07' '2014-06' '2014-05' '2014-04' '2014-03' '2014-02' '2014-01'
 '2013-12' '2013-11' '2013-10' '2013-09' '2013-08' '2013-07' '2013-06'
 '2013-05' '2013-04' '2013-03' '2013-02' '2013-01' '2012-12' '2012-11'
 '2012-10' '2012-09' '2012-08' '2012-07' '2012-06' '2012-05' '2012-04'
 '2012-03' '2012-02' '2012-01' '2011-12' '2011-11' '2011-10' '2011-09'
 '2011-08' '2011-07' '2011-06' '2011-05' '2011-04' '2011-03' '2011-02'
 '2011-01' '2010-12' '2010-11' '2010-10' '2010-09' '2010-08' '2010-07']'''
distanceFunction = (input()) 
neighborCount = int(input())
minkowskiExp = float(input())
knn = KNearestNeigbor(distanceFunction, neighborCount, "Start", minkowskiExp)

# predict
x = np.array([startVal, openVal, highVal, lowVal, closeVal, marketCapVal, periodVal])
knn.predict(trainData, x)

##### Implementasi Library

In [None]:
# partition into 80% and 20% (testing)
from sklearn.neighbors import KNeighborsRegressor

# X = features (all columns except target)
# y = target column
X_train = trainData.drop(columns=["Start"])
y_train = trainData["Start"]

knn = KNeighborsRegressor(n_neighbors=neighborCount, metric='minkowski', p=minkowskiExp)
knn.fit(X_train, y_train)

# Predict for a new sample
x_new = np.array([openVal, highVal, lowVal, closeVal, marketCapVal, periodVal]).reshape(1, -1)
predicted = knn.predict(x_new)
print("Predicted value:", predicted)


#### K fold validation

##### Implementasi Manual

In [None]:
# partition into k = user input
k = int(input())
n = len(df1)
fold_size = n // k

for i in range(k) :
    start_idx = i * fold_size
    end_idx = (i + 1) * fold_size if i != k-1 else n  # last fold may include remainder
    testData = df1.iloc[start_idx:end_idx][features]
    
    # Define train data by dropping test indices
    trainData = df1.drop(df1.index[start_idx:end_idx])[features]

    # init
    # "Start", "Open", "High", "Low", "Close", "Market Cap", "Volume", "Period"
    startVal = float(input()) # in miliseconds
    openVal = float(input())
    highVal = float(input())
    lowVal = float(input())
    closeVal = float(input())
    marketCapVal = float(input())
    periodVal = (input())
    # choose periodval out of this matrix
    '''['2024-06' '2024-05' '2024-04' '2024-03' '2024-02' '2024-01' '2023-12'
    '2023-11' '2023-10' '2023-09' '2023-08' '2023-07' '2023-06' '2023-05'
    '2023-04' '2023-03' '2023-02' '2023-01' '2022-12' '2022-11' '2022-10'
    '2022-09' '2022-08' '2022-07' '2022-06' '2022-05' '2022-04' '2022-03'
    '2022-02' '2022-01' '2021-12' '2021-11' '2021-10' '2021-09' '2021-08'
    '2021-07' '2021-06' '2021-05' '2021-04' '2021-03' '2021-02' '2021-01'
    '2020-12' '2020-11' '2020-10' '2020-09' '2020-08' '2020-07' '2020-06'
    '2020-05' '2020-04' '2020-03' '2020-02' '2020-01' '2019-12' '2019-11'
    '2019-10' '2019-09' '2019-08' '2019-07' '2019-06' '2019-05' '2019-04'
    '2019-03' '2019-02' '2019-01' '2018-12' '2018-11' '2018-10' '2018-09'
    '2018-08' '2018-07' '2018-06' '2018-05' '2018-04' '2018-03' '2018-02'
    '2018-01' '2017-12' '2017-11' '2017-10' '2017-09' '2017-08' '2017-07'
    '2017-06' '2017-05' '2017-04' '2017-03' '2017-02' '2017-01' '2016-12'
    '2016-11' '2016-10' '2016-09' '2016-08' '2016-07' '2016-06' '2016-05'
    '2016-04' '2016-03' '2016-02' '2016-01' '2015-12' '2015-11' '2015-10'
    '2015-09' '2015-08' '2015-07' '2015-06' '2015-05' '2015-04' '2015-03'
    '2015-02' '2015-01' '2014-12' '2014-11' '2014-10' '2014-09' '2014-08'
    '2014-07' '2014-06' '2014-05' '2014-04' '2014-03' '2014-02' '2014-01'
    '2013-12' '2013-11' '2013-10' '2013-09' '2013-08' '2013-07' '2013-06'
    '2013-05' '2013-04' '2013-03' '2013-02' '2013-01' '2012-12' '2012-11'
    '2012-10' '2012-09' '2012-08' '2012-07' '2012-06' '2012-05' '2012-04'
    '2012-03' '2012-02' '2012-01' '2011-12' '2011-11' '2011-10' '2011-09'
    '2011-08' '2011-07' '2011-06' '2011-05' '2011-04' '2011-03' '2011-02'
    '2011-01' '2010-12' '2010-11' '2010-10' '2010-09' '2010-08' '2010-07']'''
    distanceFunction = (input()) 
    neighborCount = int(input())
    minkowskiExp = float(input())
    knn = KNearestNeigbor(distanceFunction, neighborCount, "Start", minkowskiExp)

    # predict
    x = np.array([startVal, openVal, highVal, lowVal, closeVal, marketCapVal, periodVal])
    prediction = knn.predict(trainData, x)

    print(f"Fold {i+1} Prediction: {prediction}")

##### Implementasi Library

In [None]:
from sklearn.neighbors import KNeighborsRegressor

# X = features (all columns except target)
# y = target column

# partition into k = user input
k = int(input())
n = len(df1)
fold_size = n // k

for i in range(k) :
    start_idx = i * fold_size
    end_idx = (i + 1) * fold_size if i != k-1 else n  # last fold may include remainder
    testData = df1.iloc[start_idx:end_idx][features]
    
    # Define train data by dropping test indices
    trainData = df1.drop(df1.index[start_idx:end_idx])[features]

    X_train = trainData.drop(columns=["Start"])
    y_train = trainData["Start"]

    knn = KNeighborsRegressor(n_neighbors=neighborCount, metric='minkowski', p=minkowskiExp)
    knn.fit(X_train, y_train)

    # Predict for a new sample
    x_new = np.array([openVal, highVal, lowVal, closeVal, marketCapVal, periodVal]).reshape(1, -1)
    predicted = knn.predict(x_new)
    print(f"Fold {i + 1} predicted value:", predicted)


## Polynomial Regression

#### Hold out validation

##### Implementasi Manual

In [None]:
# partition into 80% and 20% (testing)

##### Implementasi Library

In [None]:
# partition into 80% and 20% (testing)

#### K fold validation

##### Implementasi Manual

In [None]:
# partition into 80% and 20% (testing)

##### Implementasi Library

In [None]:
# partition into 80% and 20% (testing)

## Regression Tree (CART)

#### Hold out validation

##### Implementasi Manual

In [None]:
# partition into 80% and 20% (testing)

##### Implementasi Library

In [None]:
# partition into 80% and 20% (testing)

#### K fold validation

##### Implementasi Manual

In [None]:
# partition into 80% and 20% (testing)

##### Implementasi Library

In [None]:
# partition into 80% and 20% (testing)

## Support Vector Machine (SVM)

#### Hold out validation

##### Implementasi Manual

In [None]:
# partition into 80% and 20% (testing)

##### Implementasi Library

In [None]:
# partition into 80% and 20% (testing)

#### K fold validation

##### Implementasi Manual

In [None]:
# partition into 80% and 20% (testing)

##### Implementasi Library

In [None]:
# partition into 80% and 20% (testing)

## Artificial Neural Network (ANN)

#### Hold out validation

##### Implementasi Manual

In [None]:
# partition into 80% and 20% (testing)

##### Implementasi Library

In [None]:
# partition into 80% and 20% (testing)

#### K fold validation

##### Implementasi Manual

In [None]:
# partition into 80% and 20% (testing)

##### Implementasi Library

In [None]:
# partition into 80% and 20% (testing)

# Bagian 3 (Unsupervised Learning)

Bagian ini akan menggunakan data iris.csv yang terletak pada folder data dengan kolom sebagai berikut: 
1. sepal_width : numerical
2. sepal_length : numerical
3. petal_width : numerical
4. petal_length : numerical
5. class : categorical

## K Means

### Implementasi manual

### Implementasi Library

## DBSCAN

### Implementasi manual

### Implementasi Library

## PCA

### Implementasi manual

### Implementasi Library

# Bagian 4 (Reinforcement Learning)

## SARSA

## Q Learning