In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import confusion_matrix, accuracy_score, recall_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv("data/cleaned_encoded_kickstarter_projects.csv")
df.head()

In [None]:
# plotting goal and pledged
ax = sns.scatterplot(data=df, x='goal',y='pledged', hue='state')
plt.xscale('log')
plt.yscale('log')

In [None]:
# apply logarithmic transformation to 'goal' and 'pledged' columns
df['goal'] = np.log1p(df['goal'])  # log1p is used to handle zero values
df['pledged'] = np.log1p(df['pledged'])

In [None]:
# check if transformation has been successful
ax = sns.scatterplot(x=df['goal'],y=df['pledged'], hue=df['state'])

In [None]:
df.describe().round(0)

In [None]:
# correlation heatmap 
correlations = df.corr(numeric_only=True)
mask = np.triu(correlations)
sns.heatmap(correlations , vmax=1, vmin=-1, annot=True, mask=mask, cmap="YlGnBu");

## Simple KNN

In [None]:
# defining target and other variables
X = df[['duration']]
y = df['pledged'].astype('int')

In [None]:
# split data into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# train model
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

In [None]:
# predict on test set
y_pred = knn.predict(X_test)

# print accuracy score 
print("Accuracy:", accuracy_score(y_test, y_pred).round(2))

## More KNN

In [None]:
# define X and y
X = df[['duration','launch_month','category_encoded','country_encoded','goal']]
y = df['pledged'].astype('int')

# split data into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# train model
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

# predict on test set
y_pred = knn.predict(X_test)

# print accuracy score 
print("Accuracy:", accuracy_score(y_test, y_pred).round(2))