# Imports

In [1]:
! pip install fastai --quiet

[0m

In [2]:
import pandas as pd, numpy as np
from glob import glob
import time
import os
from IPython.display import display, HTML




from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype
from fastai.tabular.all import *
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from IPython.display import Image, display_svg, SVG
from fastai.imports import *

pd.options.display.max_rows = 20
pd.options.display.max_columns = 8
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 150)
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (10, 6)
matplotlib.rcParams['figure.facecolor'] = '#00000000'

# All Functions

In [3]:
def reward_levels_engineering(df):
    df["reward levels"] = df["reward levels"].str.replace("$", "", regex=False)
    df_split = df["reward levels"].str.split(",", expand=True)[[0]]
    df_split.columns = ['level_0']
    df_split = df_split.fillna("0")
    df_split = df_split.astype(int)
    df = df.join(df_split)
    return df


def load_df_and_clean(df):
    df = df.drop(columns=['project id'])
    df = df[df.levels <= 11]
    f = df.comments <= 1000
    df = df[f]
    df['funded date'] = pd.to_datetime(df['funded date'])
    df['year'] = df['funded date'].dt.year
    df = reward_levels_engineering(df) # apply necessary feature engineering for the reward levels
    return df    

# make a log transformation quickly and efficiently
def log_transform(df,col):
    df[col]= np.log1p(df[col])
    # print(df[col].skew())
    
def set_range_col(df,col,l,r):
    # very cool state of the pledged money raised
    df =  df[(df[col] >= l) & (df[col] <= r)]
    # print( df[(df[col] >= l) & (df[col] <= r)][col].skew())
    return df

def deal_with_outliers(df,col):
    Q1 = df[col].quantile(0.25)  # 25th percentile
    Q3 = df[col].quantile(0.75)  # 75th percentile
    IQR = Q3 - Q1                      # Interquartile range

    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    upper_bound = np.ceil(upper_bound)
    outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]
    # print(f"Number of outliers: {len(outliers)}")

    df.loc[df[col] > upper_bound, col] = upper_bound

def drop_columns(df,cols = []):
    if cols: 
        df = df.drop(columns=cols)
    return df

def all_data_preprocessing(df):
    df = load_df_and_clean(df)
    
    dep_var = 'pledged'
    log_transform(df,'pledged')
    df = set_range_col(df,'pledged',2,11)

    log_transform(df,'goal')
    
    deal_with_outliers(df,'backers')

    log_transform(df,'backers')
    
    deal_with_outliers(df,'comments')

    log_transform(df,'comments')
    
    deal_with_outliers(df,'level_0')
    
    log_transform(df,'level_0') # shall we log transform it ? 
    
    df = drop_columns(df,cols=['reward levels'])
    
    return df

def train_test_split(df):
    total = df.shape[0]
    testing_portion = int(total * 0.05)
    
    # the splits
    training_data = total - testing_portion
    test_data = training_data + testing_portion


    df1 = df[:training_data]
    df_test = df[training_data:test_data]
    return df1, df_test

# it is good to make your loss function from scratch for you to see how math's
# will be directly implemented inside the work that you have at hand
# let's make the rmse

import math
def r_mse(pred, y): return round(math.sqrt(((pred-y)**2).mean()), 6)
def m_rmse(m,xs,y): return r_mse(m.predict(xs),y)


def one_prediction(df):
    dl_test = to.dataloaders().test_dl(df)
    xs_test, y_test = dl_test.train.xs, dl_test.train.y
    xs_test = xs_test[xs.columns.to_list()]
    return xs_test

def final_one_prediction(df,rf,xgbR):
    xs_test = one_prediction(df)
    pred_rf = rf.predict(xs_test)
    pred_xgb = xgbR.predict(xs_test)
    combined_predictions = [
    pred_rf * 0.1,
    pred_xgb * 0.89999999,
        ]

    # Compute the weighted average
    preds_weighted = np.sum(combined_predictions, axis=0)

    # print(f'Predicted Pledged: $ {np.exp(preds_weighted[0]):.2f}')
    return preds_weighted


# Real Talk

## Load and clean data

In [11]:
df =  pd.read_csv('event_camp.csv', encoding_errors='ignore')

df = all_data_preprocessing(df)

## Train / Test Split

In [5]:
df1, df_test = train_test_split(df)

In [6]:
to = load_pickle('./to_event.pkl')
y = to.train.y
valid_y = to.valid.y
xs = load_pickle('./xs_final.pkl')
valid_xs = load_pickle('./valid_xs_final.pkl')

# GUI

In [7]:
categories = list(df1['category'].unique())
sub_cat = list(df1['subcategory'].unique())
loc = list(df1['location'].unique())
stat = list(df1['status'].unique())



In [8]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# Create input fields

name = widgets.Text(value="Point of Descent", description="Name:")

category = widgets.Dropdown(value=categories[1], options=categories ,description="Category:")

subcategory = widgets.Dropdown(value=sub_cat[18], options =sub_cat, description="subcategory:")


goal = widgets.FloatText(value=2500.0, description="Goal:")

location = widgets.Dropdown(value=loc[0],options=loc, description="location:")

status = widgets.Dropdown(value=stat[0],options=stat, description="status:")

funded_percentage = widgets.FloatText(value=1.038, description="funded percentage:")

backers = widgets.FloatText(value=21, description="backers:")


levels = widgets.FloatText(value=8, description="levels:")

updates = widgets.FloatText(value=6, description="updates:")

comments = widgets.FloatText(value=3, description="comments:")

duration = widgets.FloatText(value=91.0, description="duration:")


# Display widgets
display(name, category,subcategory, goal,location, status, funded_percentage, backers, levels, updates, comments, duration)

# Output widget for displaying updates
output = widgets.Output()
display(output)

data = {}

# Function to save values
def save_changes(b):
    # python default behavior is to treat the 
    # variables inside a function as local variables 
    # unless explicitly specified as global 
    global data
    # display(name, category, goal,location, status, funded_percentage, backers, funded_date, levels, updates, comments, duration)
    data = {
        "project id": 2623100,
        "name": name.value,
        "url": "http://www.kickstarter.com/projects/bmag-games/point-of-descent",
        "category": category.value,
        "subcategory": subcategory.value,
        "location": location.value,
        "status": status.value,
        "goal": goal.value,
        "pledged": 2595.0,
        "funded percentage": funded_percentage.value,
        "backers": backers.value,
        "funded date": "Sat, 25 Jun 2011 15:54:12 -0000",
        "levels": levels.value,
        "reward levels": "5, 10, 25, 25, 100, 100, 500, 500",
        "updates": updates.value,
        "comments": comments.value,
        "duration": duration.value,
        "reward levels" : "$5,$10,$25,$25,$100,$100,$500,$500"
        
    }

# Save button
save_button = widgets.Button(description="Save Changes", button_style='success')
save_button.on_click(save_changes)
display(save_button)

Text(value='Point of Descent', description='Name:')

Dropdown(description='Category:', index=1, options=('Film & Video', 'Games', 'Fashion', 'Art', 'Technology', '…

Dropdown(description='subcategory:', index=18, options=('Short Film', 'Board & Card Games', 'Animation', 'Fash…

FloatText(value=2500.0, description='Goal:')

Dropdown(description='location:', options=('Columbia, MO', 'Maplewood, NJ', 'Los Angeles, CA', 'Portland, OR',…

Dropdown(description='status:', options=('successful', 'failed', 'live', 'canceled', 'suspended'), value='succ…

FloatText(value=1.038, description='funded percentage:')

FloatText(value=21.0, description='backers:')

FloatText(value=8.0, description='levels:')

FloatText(value=6.0, description='updates:')

FloatText(value=3.0, description='comments:')

FloatText(value=91.0, description='duration:')

Output()

Button(button_style='success', description='Save Changes', style=ButtonStyle())

In [13]:
data

{'project id': 2623100,
 'name': 'Point of Descent',
 'url': 'http://www.kickstarter.com/projects/bmag-games/point-of-descent',
 'category': 'Games',
 'subcategory': 'Video Games',
 'location': 'Columbia, MO',
 'status': 'successful',
 'goal': 2500.0,
 'pledged': 2595.0,
 'funded percentage': 1.038,
 'backers': 21.0,
 'funded date': 'Sat, 25 Jun 2011 15:54:12 -0000',
 'levels': 8.0,
 'reward levels': '$5,$10,$25,$25,$100,$100,$500,$500',
 'updates': 6.0,
 'comments': 3.0,
 'duration': 91.0}

In [14]:
import joblib
learners = joblib.load("event_pledged.joblib")
rf = learners['rf']
xgbR = learners['xgb']

In [15]:
df = pd.DataFrame([data])


df = all_data_preprocessing(df)
pred = final_one_prediction(df,rf,xgbR)

# Create HTML with styling for a card
html_content = f"""
<div style="border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; width: 300px; 
            background-color: #f0f8ff; box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);">
    <h2 style="color: green; text-align: center;">Predicted Pledge: <strong>${np.exp(pred[0]):.2f}</strong></h2>
</div>
"""

# Display the card
display(HTML(html_content))

In [16]:
r_mse(pred, np.log1p(2595.0))

0.004728

# Accuracy ? 

# Interpretation of RMSLE Error (0.004728)

## 1. Magnitude of Error (0.004728)
- Since RMSLE is calculated on the log-transformed values of the actual and predicted targets, an error of **0.004728** is **very low**.
- This means your predictions are **very close** to the actual values in a relative sense.

## 2. Effect in Real Scale
- Because the error is in the log-transformed scale, we can approximate its real-world effect:
- If $$ \text{RMSLE} = 0.004728 $$, then the predicted value deviates by approximately:

  $$ e^{0.004728} \approx 1.00474 $$

  times from the actual value.
- This means, on average, your predictions differ by only **0.474%** from the actual values.

# How to Interpret in Real-World Predictions

- If the actual value is **$2595**, then the predicted value is expected to be within:

  $$ 2595 \times (1 \pm 0.00474) = [2582.7, 2607.3] $$

- This means the model’s prediction is usually very close to the actual value.

# Resources

$$ CHATGPT $$
$$ Fast ai $$
$$ Sci-kitlearn $$



## Previous project
[AI-POWERED WEB APP](https://github.com/Ali-Alshaikh/AI-Powered-WebApp/blob/master/ai/house_price_prediction.ipynb)