In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import IntegerLookup
from tensorflow.keras.layers import Normalization
from tensorflow.keras.layers import StringLookup

In [2]:
print(tf.__version__)

2.7.0


In [60]:
# def wrangle(CSV_Path):
#     df = pd.read_csv(CSV_Path,compression = "zip").drop(columns=['Unnamed: 0'])

#     # get string lenght from 'description' and 'project_name' columns
#     df['description'] = [len(desc) for desc in df['description']]
#     df['project_name'] = [len(title) for title in df['project_name']]

#     df['state'] = [int(x) for x in df['state'].isin(['successful'])]

#     # Reorder Columns & rename
#     df = df[['state','country', 'city_name', 'category', 'currency', 'launch_month_number_of_the_year'
#              ,'goal','campaign_duration_in_days', 'description', 'project_name']]
#     df.columns = ['state','country', 'city_name', 'category', 'currency', 'launch_month'
#                  ,'goal','days_of_campaign', 'description', 'project_name']
    
#     df['category'] = df['category'].str.lower()
#     df['city_name'] = df['city_name'].str.lower()
    
#     fail_cats = list(set(df['category'][df['state']==0]))
#     successful_cats = list(set(df['category'][df['state']==1]))
    
#     just_successful_cats = [cat for cat in successful_cats if cat not in fail_cats]
    
#     df = df[~df['category'].isin(just_successful_cats)]
    
#     return df


In [69]:
# df = wrangle('Kick(2021-2022)_wrangled2.zip')

In [75]:
df = pd.read_csv('clean_data.zip',compression = "zip").drop(columns=['Unnamed: 0'])

In [76]:
df.head()

Unnamed: 0,state,country,city_name,category,currency,launch_month,goal,days_of_campaign,description,project_name
0,0,US,burlington,glass,USD,8,2000.0,30,96,50
1,1,US,vineyard,events,USD,7,3000.0,16,133,26
2,1,BE,antwerp,comedy,EUR,10,300.0,60,134,26
3,1,US,new york,musical,USD,7,10000.0,31,92,41
4,1,CA,calgary,fine art,CAD,11,800.0,6,130,50


In [77]:
df.shape

(782971, 10)

In [96]:
val_df = df.sample(frac=0.2, random_state=42)
train_df = df.drop(val_df.index)

print(f"Using {len(train_df)} samples for training and {len(val_df)} for validation")

Using 166572 samples for training and 41643 for validation


In [97]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("state")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds

train_ds = dataframe_to_dataset(train_df)
val_ds = dataframe_to_dataset(val_df)

In [99]:
for x, y in train_ds.take(1):
    print("Input:", x)
    print("Target:", y)

Input: {'country': <tf.Tensor: shape=(), dtype=string, numpy=b'US'>, 'city_name': <tf.Tensor: shape=(), dtype=string, numpy=b'Seattle'>, 'category': <tf.Tensor: shape=(), dtype=string, numpy=b'Mixed Media'>, 'currency': <tf.Tensor: shape=(), dtype=string, numpy=b'USD'>, 'launch_month': <tf.Tensor: shape=(), dtype=int64, numpy=5>, 'goal': <tf.Tensor: shape=(), dtype=float64, numpy=800.0>, 'days_of_campaign': <tf.Tensor: shape=(), dtype=int64, numpy=18>, 'description': <tf.Tensor: shape=(), dtype=int64, numpy=124>, 'project_name': <tf.Tensor: shape=(), dtype=int64, numpy=49>}
Target: tf.Tensor(1, shape=(), dtype=int64)


In [100]:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

In [101]:
def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature

def encode_categorical_feature(feature, name, dataset, is_string):
    lookup_class = StringLookup if is_string else IntegerLookup
    # Create a lookup layer which will turn strings into integer indices
    lookup = lookup_class(output_mode="binary")

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the set of possible string values and assign them a fixed integer index
    lookup.adapt(feature_ds)

    # Turn the string input into integer indices
    encoded_feature = lookup(feature)
    return encoded_feature

In [102]:
# Categorical features encoded as strings
country = keras.Input(shape=(1,), name="country", dtype="string")
city_name = keras.Input(shape=(1,), name="city_name", dtype="string")
category = keras.Input(shape=(1,), name="category", dtype="string")
currency = keras.Input(shape=(1,), name="currency", dtype="string")

# Categorical features encoded as integers
launch_month = keras.Input(shape=(1,), name="launch_month", dtype="int64")

# Numerical features
goal = keras.Input(shape=(1,), name="goal")
days_of_campaign = keras.Input(shape=(1,), name="days_of_campaign")
description = keras.Input(shape=(1,), name="description")
project_name = keras.Input(shape=(1,), name="project_name")


all_inputs = [country, city_name, category, currency, launch_month,
               goal, days_of_campaign, description, project_name]

In [103]:
# String categorical features
country_encoded = encode_categorical_feature(country, "country", train_ds, True)
city_name_encoded = encode_categorical_feature(city_name, "city_name", train_ds, True)
category_encoded = encode_categorical_feature(category, "category", train_ds, True)
currency_encoded = encode_categorical_feature(currency, "currency", train_ds, True)

# Integer categorical features
launch_month_encoded = encode_categorical_feature(launch_month, "launch_month", train_ds, False)

# Numerical features
goal_encoded = encode_numerical_feature(goal, "goal", train_ds)
days_of_campaign_encoded = encode_numerical_feature(days_of_campaign, "days_of_campaign", train_ds)
description_encoded = encode_numerical_feature(description, "description", train_ds)
project_name_encoded = encode_numerical_feature(project_name, "project_name", train_ds)


all_features = layers.concatenate([country_encoded, city_name_encoded, category_encoded, currency_encoded,
               launch_month_encoded, goal_encoded, days_of_campaign_encoded, description_encoded,
               project_name_encoded])

In [104]:
x = layers.Dense(50, activation="relu")(all_features)
x = layers.Dropout(0.3)(x)
output = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(all_inputs, output)
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

In [46]:
# # `rankdir='LR'` is to make the graph horizontal.
# keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

In [63]:
model.fit(train_ds, epochs=10, validation_data=val_ds)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x157848fc8e0>

In [90]:

sample = {
    "country": 'US',
    "city_name": 'Burlington',
    "category": 'Glass',
    "currency": 'USD',
    "launch_month": 8,
    "goal": 2000,
    "days_of_campaign": 30,
    "description": 96,
    "project_name": 50
}

input_dict = {name: tf.convert_to_tensor([value]) for name, value in sample.items()}
predictions = model.predict(input_dict)

print(
    f"This particular Kickstarter campaign has a {round(100 * predictions[0][0],2)}% probability "
    "of been succssesful, as evaluated by our model."
)

This particular Kickstarter campaign has a 66.16% probability of been succssesful, as evaluated by our model.


In [91]:
# model.save('TrainedModel1')