# PART 2: TABULAR - BOOK SALES PREDICTOR

In [1]:
from fastai.tabular.all import *
import pandas as pd

In [6]:
tabular_data = {
    'Genre': ['Fantasy', 'Sci-Fi', 'Thriller', 'Fantasy', 'Sci-Fi', 'Thriller', 'Fantasy', 'Sci-Fi'],
    'Avg_Rating': [4.8, 4.5, 4.2, 4.9, 4.6, 4.3, 4.7, 4.8],
    'Page_Count': [870, 450, 320, 1178, 510, 280, 600, 374],
    'Author_Famous': [True, False, True, True, True, False, False, True],
    'Sales_Rank': [1, 15, 25, 3, 10, 35, 12, 5] # This is what we want to predict
}


tabular_df = pd.DataFrame(tabular_data).astype({'Genre': 'category', 'Author_Famous': 'bool', 'Avg_Rating': 'float', 'Page_Count': 'int', 'Sales_Rank': 'int'})
tabular_df.head()

Unnamed: 0,Genre,Avg_Rating,Page_Count,Author_Famous,Sales_Rank
0,Fantasy,4.8,870,True,1
1,Sci-Fi,4.5,450,False,15
2,Thriller,4.2,320,True,25
3,Fantasy,4.9,1178,True,3
4,Sci-Fi,4.6,510,True,10


In [3]:
cat_names = ['Genre', 'Author_Famous']
cont_names = ['Avg_Rating', 'Page_Count']

procs = [Categorify,FillMissing,Normalize]
y_names= 'Sales_Rank'

In [8]:
dls_tab = TabularDataLoaders.from_df(
    tabular_df,
    procs=procs,
    cat_names=cat_names,
    cont_names=cont_names,
    y_names=y_names,
    y_block=RegressionBlock, # Specify we are predicting a number
    bs=4,
    valid_pct=0.1 # Use 10% of data for validation
)

In [10]:
dls_tab.show_batch()

Unnamed: 0,Genre,Author_Famous,Avg_Rating,Page_Count,Sales_Rank
0,Fantasy,True,4.9,1177.999975,3.0
1,Thriller,False,4.3,280.000008,35.0
2,Sci-Fi,True,4.8,373.999994,5.0
3,Thriller,True,4.2,320.000004,25.0


In [11]:
learn_tab = tabular_learner(dls_tab, metrics=rmse)
learn_tab.fit_one_cycle(3)

epoch,train_loss,valid_loss,_rmse,time
0,398.592834,0.881139,0.93869,00:00
1,309.445892,0.894184,0.945613,00:00
2,377.122314,0.877718,0.936866,00:00


In [13]:

single_row_df = tabular_df.iloc[[0]]

single_row_dl = dls_tab.test_dl(single_row_df)


preds, _, dec_preds = learn_tab.get_preds(dl=single_row_dl, with_decoded=True)

print("Decoded Input:")
print(dec_preds[0])

print(f"\nPredicted Sales Rank: {preds.squeeze().item():.2f}")
print(f"Actual Sales Rank: {tabular_df.iloc[0]['Sales_Rank']}")

Decoded Input:
tensor([0.0631])

Predicted Sales Rank: 0.06
Actual Sales Rank: 1
