In [108]:
import numpy as np
import pandas as pd
import tensorflow as tf
import plotly.express as px

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight


In [109]:
data = pd.read_csv('/kaggle/input/kickstarter-projects/ks-projects-201801.csv')

In [110]:
data

Unnamed: 0,ID,name,category,main_category,currency,deadline,goal,launched,pledged,state,backers,country,usd pledged,usd_pledged_real,usd_goal_real
0,1000002330,The Songs of Adelaide & Abullah,Poetry,Publishing,GBP,2015-10-09,1000.0,2015-08-11 12:12:28,0.0,failed,0,GB,0.0,0.0,1533.95
1,1000003930,Greeting From Earth: ZGAC Arts Capsule For ET,Narrative Film,Film & Video,USD,2017-11-01,30000.0,2017-09-02 04:43:57,2421.0,failed,15,US,100.0,2421.0,30000.00
2,1000004038,Where is Hank?,Narrative Film,Film & Video,USD,2013-02-26,45000.0,2013-01-12 00:20:50,220.0,failed,3,US,220.0,220.0,45000.00
3,1000007540,ToshiCapital Rekordz Needs Help to Complete Album,Music,Music,USD,2012-04-16,5000.0,2012-03-17 03:24:11,1.0,failed,1,US,1.0,1.0,5000.00
4,1000011046,Community Film Project: The Art of Neighborhoo...,Film & Video,Film & Video,USD,2015-08-29,19500.0,2015-07-04 08:35:03,1283.0,canceled,14,US,1283.0,1283.0,19500.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
378656,999976400,ChknTruk Nationwide Charity Drive 2014 (Canceled),Documentary,Film & Video,USD,2014-10-17,50000.0,2014-09-17 02:35:30,25.0,canceled,1,US,25.0,25.0,50000.00
378657,999977640,The Tribe,Narrative Film,Film & Video,USD,2011-07-19,1500.0,2011-06-22 03:35:14,155.0,failed,5,US,155.0,155.0,1500.00
378658,999986353,Walls of Remedy- New lesbian Romantic Comedy f...,Narrative Film,Film & Video,USD,2010-08-16,15000.0,2010-07-01 19:40:30,20.0,failed,1,US,20.0,20.0,15000.00
378659,999987933,BioDefense Education Kit,Technology,Technology,USD,2016-02-13,15000.0,2016-01-13 18:13:53,200.0,failed,6,US,200.0,200.0,15000.00


In [111]:
data = data.drop(['ID','name'],axis=1)

In [112]:
data.isnull().sum()

category               0
main_category          0
currency               0
deadline               0
goal                   0
launched               0
pledged                0
state                  0
backers                0
country                0
usd pledged         3797
usd_pledged_real       0
usd_goal_real          0
dtype: int64

In [113]:
data = data.dropna(axis=0).reset_index(drop=True)
#data['usd pledged'] = data['usd pledged'].fillna(data['usd pledged'].mean())

In [114]:
data['state'] = data['state'].apply(lambda x : 1 if x =='successful' else 0)

In [115]:
data['YEAR launched'] = data['launched'].apply(lambda x : x[0:4])
data['MONTH launched'] = data['launched'].apply(lambda x : x[5:7])
data['YEAR deadline'] = data['deadline'].apply(lambda x : x[0:4])
data['MONTH deadline'] = data['deadline'].apply(lambda x : x[5:7])

data = data.drop(['deadline','launched'],axis=1)

In [116]:
nominal_fets = ['YEAR launched','YEAR deadline','country','category','main_category','currency']
nominal_prefix = ['L','D','CO','CA','M','CU']

for col , prefix in zip(nominal_fets,nominal_prefix):
    dummie = pd.get_dummies(data[col],prefix=prefix).astype(np.float)
    data = pd.concat([data,dummie],axis=1)
    data = data.drop(col,axis=1)


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations



In [117]:
y = data['state']
x = data.drop('state',axis=1)

In [118]:
y

0         0
1         0
2         0
3         0
4         0
         ..
374859    0
374860    0
374861    0
374862    0
374863    0
Name: state, Length: 374864, dtype: int64

In [119]:
scaler = StandardScaler()
x = scaler.fit_transform(x)

In [120]:
x_train , x_test , y_train , y_test = train_test_split(x,y,train_size=0.7,random_state=7)

In [133]:
x.shape
y.mean()

(374864, 239)

In [132]:
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=y_train.unique(),
    y=y_train
)

class_weights = dict(enumerate(class_weights))

In [137]:
inputs = tf.keras.Input(shape=(239,))
x = tf.keras.layers.Dense(256,activation='relu')(inputs)
x = tf.keras.layers.Dense(256,activation='relu')(x)
outputs = tf.keras.layers.Dense(1,activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs,outputs=outputs)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy',tf.keras.metrics.AUC(name='auc')]
)

hist = model.fit(
    x_train,
    y_train,
    validation_split = 0.2,
    class_weight=class_weights,
    batch_size=40,
    epochs=100,
    callbacks= tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


In [139]:
fig = px.line(
    hist.history,
    y=['loss','val_loss']
)
fig.show()

In [141]:
model.evaluate(x_test,y_test)



[0.15692482888698578, 0.9393917918205261, 0.9837517142295837]

url : https://www.kaggle.com/datasets/kemical/kickstarter-projects?select=ks-projects-201801.csv

download : https://www.kaggle.com/datasets/kemical/kickstarter-projects/download?datasetVersionNumber=7