# Code pipeline

In [1]:
# !pip install tai-chi-engine

## Probable other dependencies
transformers==4.12.3

In [3]:
from forgebox.imports import *
from tai_chi_engine import TaiChiEngine

## Demo tasks

> Load all the code above in one shot, the demo starts here

In [8]:
from tai_chi_engine.utils import df_creator_image_folder

### Choose dataset

In [5]:
# BEAR_DATASET = HOME/"Downloads"/"bear_dataset"
DATA = Path("/GCI/data")
BEAR_DATASET = DATA/"bear_dataset"
ROTTEN_TOMATOES = DATA/"rttmt"
NETFLIX = DATA/"nf"

Choose one of the following to run 

#### Netflix 📺

In [10]:
base_df = pd.read_csv(NETFLIX/"netflix_titles.csv")
base_df = base_df
base_df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


#### The bear 🐻

In [9]:
base_df = df_creator_image_folder(BEAR_DATASET)
base_df.head()

Unnamed: 0,path
0,/GCI/data/bear_dataset/black/00000090.jpg
1,/GCI/data/bear_dataset/black/00000189.jpg
2,/GCI/data/bear_dataset/black/00000051.jpg
3,/GCI/data/bear_dataset/teddys/00000172.jpg
4,/GCI/data/bear_dataset/grizzly/00000108.jpg


#### The rotten tomatoes 🍅 🎬

In [6]:
# the rotten tomatoes dataset, we are not using every line

base_df = pd.read_csv(ROTTEN_TOMATOES/'critic_reviews.csv', nrows=50000)
base_df = base_df[~base_df['review_score'].isna()].reset_index(drop=True)
base_df = base_df[~base_df['review_content'].isna()].reset_index(drop=True)
base_df = base_df[~base_df['critic_name'].isna()].reset_index(drop=True)

base_df = base_df[base_df['review_score'].apply(lambda x: "/" in x)].reset_index(drop=True)

base_df['review_score'] = base_df['review_score'].apply(eval)

base_df.head()

Unnamed: 0,rotten_tomatoes_link,critic_name,top_critic,publisher_name,review_type,review_score,review_date,review_content
0,m/0814255,Ben McEachen,False,Sunday Mail (Australia),Fresh,0.7,2010-02-09,Whether audiences will get behind The Lightnin...
1,m/0814255,Nick Schager,False,Slant Magazine,Rotten,0.25,2010-02-10,Harry Potter knockoffs don't come more transpa...
2,m/0814255,Bill Goodykoontz,True,Arizona Republic,Fresh,0.7,2010-02-10,"Percy Jackson isn't a great movie, but it's a ..."
3,m/0814255,Jim Schembri,True,The Age (Australia),Fresh,0.6,2010-02-10,"Crammed with dragons, set-destroying fights an..."
4,m/0814255,Mark Adams,False,Daily Mirror (UK),Fresh,0.8,2010-02-10,"This action-packed fantasy adventure, based on..."


### Start of the pipeline

Initiate the ```phase``` to track the configuration

In [5]:
# PROJECT = Path("./project")
# PROJECT = Path("./project/image_regression")
# PROJECT = Path("./project/rotten1")
# PROJECT = Path("./project/rotten_text")
PROJECT = Path("./netflix")
# PROJECT = Path("./project0")
# PROJECT = Path("./playground")


In [None]:
from tai_chi_engine import TaiChiEngine
engine = TaiChiEngine(base_df, project=PROJECT)
engine()

## No interactive, all code pipeline

In [8]:
from tai_chi_tuna.config import PhaseConfig
from tai_chi_tuna.flow.to_enrich import set_enrich, execute_enrich
from tai_chi_tuna.flow.to_quantify import (
    execute_quantify, TaiChiDataset, choose_xy, 
    save_qdict, load_qdict
    )
from tai_chi_tuna.flow.to_model import TaiChiDataModule, assemble_model
from tai_chi_tuna.flow.to_train import (
    make_slug_name, set_trainer, run_training)

In [9]:
from tai_chi_engine import TaiChiEngine
from forgebox.html import list_group_kv

In [10]:
phase = PhaseConfig.load(PROJECT)



Excute Enrichment and create dataset

In [11]:
base_df = execute_enrich(base_df, phase, enrichments=TaiChiEngine.enrichments_map)
ds = TaiChiDataset(base_df)

Define the data transformation (into tensor), and create the **datamodule**

In [12]:
qdict = execute_quantify(df=base_df, phase=phase, quantify_map=TaiChiEngine.quantify_map)
# save quantify objects
_ = save_qdict(phase.project, qdict)

datamodule = TaiChiDataModule(ds, qdict)
datamodule.configure(**phase['batch_level'])

0it [00:00, ?it/s]

HBox(children=(HTML(value="<div class='alert alert-info' role='alert'>\n        <strong>Alert!</strong>  Loadi…

Assemble the **model**

In [None]:
module_zoo = {"all_entry": TaiChiEngine.all_entry, "all_exit": TaiChiEngine.all_exit}
final_model = assemble_model(phase, qdict, module_zoo)

Save the configuration so far

In [14]:
phase['task_slug'] = make_slug_name(phase)
phase.save()

Run the **Training**

In [None]:
run_training(phase, final_model, datamodule)(dict())

## Inference Pipeline

### Bear inference

In [2]:
from tai_chi_engine import TaiChiTrained
from pathlib import Path

In [16]:
PROJECT = Path("./project")
trained = TaiChiTrained(PROJECT)
trained



[☯️ Project: project]
	model:	self.final_model
	quantify:	self.qdict
	x_columns:	['image']
	y_columns:	['label']

In [None]:
from PIL import Image
img = Image.open("/GCI/data/bear_dataset/grizzly/00000099.jpg").convert('RGB').resize((224,224))
img

In [9]:
trained.predict({"image":img})

Unnamed: 0,category,score
0,grizzly,0.999911
1,teddys,7.8e-05
2,black,1.1e-05


In [None]:
trained.phase

In [None]:
trained.qdict['label'].category.i2c

### Netflix inference

In [1]:
from tai_chi_engine import TaiChiTrained
from pathlib import Path

PROJECT = Path("./netflix")
trained = TaiChiTrained(PROJECT)



HBox(children=(HTML(value="<div class='alert alert-info' role='alert'>\n         Assemble model, takes time</d…

In [2]:
trained

[☯️ Project: netflix]
	model:	self.final_model
	quantify:	self.qdict
	x_columns:	['description', 'cast', 'country', 'type_1']
	y_columns:	['listed_in']

In [3]:
data = {
    "description":"This movie tells a story of a freelancer jounalist found her romance when she first move into the west coast",
    "cast":"Jennifer Aniston,Paul Rudd",
    "country":"United States",
    "type_1":"Movie"
}

In [None]:
trained.predict(data)