In [22]:
# Import Dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

In [5]:
# Loading mock dataset for Provisional Machine Learning Model
movies_df = pd.read_csv("Potential_Datasets/datasets_2745_4700_movies.csv", encoding = "ISO-8859-1")
movies_df.head()

Unnamed: 0,budget,company,country,director,genre,gross,name,rating,released,runtime,score,star,votes,writer,year
0,8000000,Columbia Pictures Corporation,USA,Rob Reiner,Adventure,52287414,Stand by Me,R,8/22/1986,89,8.1,Wil Wheaton,299174,Stephen King,1986
1,6000000,Paramount Pictures,USA,John Hughes,Comedy,70136369,Ferris Bueller's Day Off,PG-13,6/11/1986,103,7.8,Matthew Broderick,264740,John Hughes,1986
2,15000000,Paramount Pictures,USA,Tony Scott,Action,179800601,Top Gun,PG,5/16/1986,110,6.9,Tom Cruise,236909,Jim Cash,1986
3,18500000,Twentieth Century Fox Film Corporation,USA,James Cameron,Action,85160248,Aliens,R,7/18/1986,137,8.4,Sigourney Weaver,540152,James Cameron,1986
4,9000000,Walt Disney Pictures,USA,Randal Kleiser,Adventure,18564613,Flight of the Navigator,PG,8/1/1986,90,6.9,Joey Cramer,36636,Mark H. Baker,1986


# Performing inital cleaning of data

In [6]:
# Droping Identification column
movies_df_clean = movies_df.copy()
movies_df_clean = movies_df_clean.drop(["name"], axis=1)
movies_df_clean.head()

Unnamed: 0,budget,company,country,director,genre,gross,rating,released,runtime,score,star,votes,writer,year
0,8000000,Columbia Pictures Corporation,USA,Rob Reiner,Adventure,52287414,R,8/22/1986,89,8.1,Wil Wheaton,299174,Stephen King,1986
1,6000000,Paramount Pictures,USA,John Hughes,Comedy,70136369,PG-13,6/11/1986,103,7.8,Matthew Broderick,264740,John Hughes,1986
2,15000000,Paramount Pictures,USA,Tony Scott,Action,179800601,PG,5/16/1986,110,6.9,Tom Cruise,236909,Jim Cash,1986
3,18500000,Twentieth Century Fox Film Corporation,USA,James Cameron,Action,85160248,R,7/18/1986,137,8.4,Sigourney Weaver,540152,James Cameron,1986
4,9000000,Walt Disney Pictures,USA,Randal Kleiser,Adventure,18564613,PG,8/1/1986,90,6.9,Joey Cramer,36636,Mark H. Baker,1986


In [8]:
# Generate our categorical variable list
movies_cat = movies_df_clean.dtypes[movies_df_clean.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
movies_df_clean[movies_cat].nunique()

company     2179
country       57
director    2759
genre         17
rating        13
released    2403
star        2504
writer      4199
dtype: int64

In [10]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(movies_df_clean[movies_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(movies_cat)
encode_df.head()

Unnamed: 0,"company_""DIA"" Productions GmbH & Co. KG",company_1+2 Seisaku Iinkai,company_101st Street Films,company_10th Hole Productions,company_120 Films,company_13 Productions,company_1492 Pictures,company_1821 Pictures,company_19 Entertainment,company_1984 Private Defense Contractors,...,writer_Zoe Heller,writer_Zoe Kazan,writer_Zoë Lund,writer_Àlex Pastor,writer_Álex de la Iglesia,writer_Álvaro del Amo,writer_Åke Sandgren,writer_Émile Gaudreault,writer_Éric Rohmer,writer_Éva Gárdos
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# Merge one-hot encoded features and drop the originals
movies_df_clean = movies_df_clean.merge(encode_df,left_index=True, right_index=True)
movies_df_clean = movies_df_clean.drop(movies_cat,1)
movies_df_clean.head()

Unnamed: 0,budget,gross,runtime,score,votes,year,"company_""DIA"" Productions GmbH & Co. KG",company_1+2 Seisaku Iinkai,company_101st Street Films,company_10th Hole Productions,...,writer_Zoe Heller,writer_Zoe Kazan,writer_Zoë Lund,writer_Àlex Pastor,writer_Álex de la Iglesia,writer_Álvaro del Amo,writer_Åke Sandgren,writer_Émile Gaudreault,writer_Éric Rohmer,writer_Éva Gárdos
0,8000000,52287414,89,8.1,299174,1986,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,6000000,70136369,103,7.8,264740,1986,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,15000000,179800601,110,6.9,236909,1986,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,18500000,85160248,137,8.4,540152,1986,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,9000000,18564613,90,6.9,36636,1986,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
# Converting score to recomendation value
movies_df_clean.loc[movies_df_clean.score >= 7.0, "recomendation"] = 2
movies_df_clean.loc[(movies_df_clean.score < 7.0) & (movies_df_clean.score > 5.0), "recomendation"] = 1
movies_df_clean.loc[movies_df_clean.score <= 5.0, "recomendation"] = 0
movies_df_clean.head()

Unnamed: 0,budget,gross,runtime,score,votes,year,"company_""DIA"" Productions GmbH & Co. KG",company_1+2 Seisaku Iinkai,company_101st Street Films,company_10th Hole Productions,...,writer_Zoe Kazan,writer_Zoë Lund,writer_Àlex Pastor,writer_Álex de la Iglesia,writer_Álvaro del Amo,writer_Åke Sandgren,writer_Émile Gaudreault,writer_Éric Rohmer,writer_Éva Gárdos,recomendation
0,8000000,52287414,89,8.1,299174,1986,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
1,6000000,70136369,103,7.8,264740,1986,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
2,15000000,179800601,110,6.9,236909,1986,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,18500000,85160248,137,8.4,540152,1986,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
4,9000000,18564613,90,6.9,36636,1986,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


# Establishing Provisional Machine Learning Model

In [17]:
# Remove loan status target from features data
y = movies_df_clean.recomendation
X = movies_df_clean.drop(columns=["score","recomendation"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler_train = scaler.fit(X_train)
X_scaler_test = scaler.fit(X_test)

# Scale the data
X_train_scaled = X_scaler_train.transform(X_train)
X_test_scaled = X_scaler_test.transform(X_test)

In [18]:
# Create a balanced random forest classifier and fit the model
rf_model = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0).fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.605


In [23]:
# Calculate the confusion matrix.
confusion_matrix(y_test, y_pred)

array([[   0,  159,    0],
       [   0, 1032,    0],
       [   0,  514,    0]], dtype=int64)

In [25]:
# Print the imbalanced classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00       159
         1.0       0.61      1.00      0.75      1032
         2.0       0.00      0.00      0.00       514

    accuracy                           0.61      1705
   macro avg       0.20      0.33      0.25      1705
weighted avg       0.37      0.61      0.46      1705



  _warn_prf(average, modifier, msg_start, len(result))


In [26]:
# List the features sorted in descending order by feature importance
feature_importances = sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

for feature in feature_importances:
       print(f"{feature[1]}: ({feature[0]})")

votes: (0.06371314481516457)
runtime: (0.05160870144131583)
country_USA: (0.03145142754670382)
gross: (0.02900456169075385)
genre_Biography: (0.024995890194256437)
genre_Drama: (0.016293328435266454)
genre_Action: (0.014848241195901989)
budget: (0.01356569492017112)
genre_Comedy: (0.012961654658728918)
genre_Horror: (0.012347223830174864)
country_Italy: (0.011536011012455251)
year: (0.0103193514857753)
genre_Crime: (0.00952684885660269)
rating_PG-13: (0.008381816233909994)
country_France: (0.008342850864320623)
country_India: (0.008278951373386962)
rating_NOT RATED: (0.0076239643566441495)
writer_Quentin Tarantino: (0.006511388263929087)
rating_Not specified: (0.005953005831184741)
director_Martin Scorsese: (0.005516782284980918)
country_Japan: (0.00514770048510946)
country_New Zealand: (0.005122106618754622)
country_Spain: (0.0050921325300668195)
country_Argentina: (0.004752033662654931)
star_Aamir Khan: (0.004628558528134237)
country_Sweden: (0.004458309908829899)
director_Mike Leigh

director_Vincent Gallo: (0.00013160914958974823)
released_4/26/2013: (0.00013148148900775703)
writer_Jarrad Paul: (0.00013141798169225637)
company_Hit Entertainment: (0.0001313552738420712)
star_Eric Balfour: (0.00013114622483841683)
star_Timothy Spall: (0.00013096268990686906)
company_Pathé Pictures International: (0.00013080621957409856)
released_11/26/2003: (0.00013067038953349297)
released_7/19/2000: (0.00013034516650516805)
company_Legendary Entertainment: (0.00013025880220411717)
director_Nick Hurran: (0.000130237395717187)
star_Jeremy Renner: (0.0001302327293517162)
director_Tommy Wiseau: (0.00013018344504225904)
released_11/5/2004: (0.00013000992598710407)
writer_Philippe Caland: (0.0001299088483112078)
star_Nicole Eggert: (0.0001297271602700858)
released_4/10/2015: (0.00012957390511466224)
released_7/14/2006: (0.00012937960803066146)
star_Brooke Shields: (0.00012925522759005704)
director_Dennis Hopper: (0.0001292506534546624)
star_Darren Robinson: (0.000129192022721611)
star_D

star_Jeni Courtney: (5.3628853052714034e-05)
star_Kelly Clarkson: (5.360522846863214e-05)
company_Black Bear Pictures: (5.357927769357598e-05)
company_Reprisal Films: (5.3570227207634174e-05)
star_Richard Chamberlain: (5.350328025181266e-05)
writer_Sherman Alexie: (5.3500409783420066e-05)
writer_Dominic Anciano: (5.347013468906757e-05)
company_J&M; Entertainment: (5.3409977024781564e-05)
company_40 Acres & A Mule Filmworks: (5.33409410464161e-05)
writer_Dennis Lehane: (5.32826484537926e-05)
star_Leigh McCormack: (5.3231592977184715e-05)
star_Helen Mirren: (5.3202565256835394e-05)
star_Kevin Bacon: (5.316066312983481e-05)
writer_Alexandre Dumas: (5.3089216997821424e-05)
writer_Catherine Breillat: (5.3054018080760255e-05)
star_Mark Wahlberg: (5.3043494012594626e-05)
company_FilmColony: (5.300626382030886e-05)
writer_Larry Sulkis: (5.2985057435471853e-05)
star_Ingrid Bolsø Berdal: (5.295672132195925e-05)
star_Sophia Loren: (5.290678238230776e-05)
star_Kerry Fox: (5.288843979953608e-05)
di

company_The Noel Gay Motion Picture Company: (1.9059396902957866e-05)
writer_Stuart Beattie: (1.9056238143770097e-05)
company_Gemini Films: (1.9039375536574597e-05)
released_9/24/1999: (1.9025207243831005e-05)
company_Rex Entertainment: (1.902337251255827e-05)
director_Mathieu Kassovitz: (1.9019172534429542e-05)
director_Ralph S. Singleton: (1.9016825638819678e-05)
released_6/24/2005: (1.89959914330122e-05)
star_Eric Bana: (1.8995712853890717e-05)
released_7/9/2014: (1.894812394910907e-05)
director_Ben Wheatley: (1.8940419347056488e-05)
star_Emmanuel Garijo: (1.8917216774347064e-05)
director_Andrew Davis: (1.8911160403890243e-05)
director_Bob Rafelson: (1.8887963388283903e-05)
writer_Jan Sardi: (1.8885586812716196e-05)
released_5/26/2011: (1.8881824280585568e-05)
released_5/11/2012: (1.887332811992733e-05)
writer_John Travis: (1.887243078317632e-05)
writer_Ken Kolb: (1.8858274900823752e-05)
released_5/22/1998: (1.8854385197466107e-05)
released_5/10/2001: (1.882703003451471e-05)
company

director_David Dobkin: (9.72122427494597e-06)
director_Richard Rush: (9.720648726791739e-06)
star_Dana Carvey: (9.688575861494311e-06)
star_Chloë Grace Moretz: (9.67931703635222e-06)
director_Richard Williams: (9.667684419833029e-06)
star_Danny Aiello: (9.635456082719753e-06)
released_9/19/2008: (9.635387715778975e-06)
writer_Mark Verheiden: (9.624278059495666e-06)
writer_Mitch Markowitz: (9.62234196279857e-06)
director_Ari Folman: (9.594339854845661e-06)
released_2/17/1999: (9.593991089064243e-06)
company_Screen Australia: (9.570869355098453e-06)
released_5/6/1988: (9.56835201421363e-06)
company_Madhouse: (9.56554342542127e-06)
writer_Richard Maltby Jr.: (9.559193581748954e-06)
company_Wytwórnia Filmów Dokumentalnych i Fabularnych (WFDiF): (9.55794647346694e-06)
company_Les Films du Lendemain: (9.556062721045664e-06)
writer_Kelly Masterson: (9.549969571765696e-06)
company_FX Sound: (9.545146501673178e-06)
director_David Bowers: (9.540205927233246e-06)
writer_Jonathan Marc Feldman: (9.

writer_Tom Flynn: (0.0)
writer_Tom Eyen: (0.0)
writer_Tom DeSanto: (0.0)
writer_Tom Davis: (0.0)
writer_Tom Butterworth: (0.0)
writer_Tom Bradby: (0.0)
writer_Tom Bleecker: (0.0)
writer_Tom Benedek: (0.0)
writer_Todd Strasser: (0.0)
writer_Todd Stephens: (0.0)
writer_Todd Robinson: (0.0)
writer_Todd McFarlane: (0.0)
writer_Todd Louiso: (0.0)
writer_Todd Graff: (0.0)
writer_Todd Field: (0.0)
writer_Todd Farmer: (0.0)
writer_Todd Casey: (0.0)
writer_Todd Alcott: (0.0)
writer_Tod Carroll: (0.0)
writer_Tine Byrckel: (0.0)
writer_Tina Gordon Chism: (0.0)
writer_Tin-suen Chan: (0.0)
writer_Tin Nam Chun: (0.0)
writer_Timur Bekmambetov: (0.0)
writer_Tim Talbott: (0.0)
writer_Tim Sullivan: (0.0)
writer_Tim Robbins: (0.0)
writer_Tim Rice: (0.0)
writer_Tim Meadows: (0.0)
writer_Tim Krabbé: (0.0)
writer_Tim Kelleher: (0.0)
writer_Tim Kazurinsky: (0.0)
writer_Tim Herlihy: (0.0)
writer_Tim Heidecker: (0.0)
writer_Tim Conway: (0.0)
writer_Tim Burns: (0.0)
writer_Thornton Wilder: (0.0)
writer_Thomas V

writer_Javier Abad: (0.0)
writer_Jason Robert Brown: (0.0)
writer_Jason Pagan: (0.0)
writer_Jason Keller: (0.0)
writer_Jason Fuchs: (0.0)
writer_Jason Filardi: (0.0)
writer_Jason A. Micallef: (0.0)
writer_Jarre Fees: (0.0)
writer_Jared Stern: (0.0)
writer_Jared Bush: (0.0)
writer_Janus Cercone: (0.0)
writer_Janet Scott Batchler: (0.0)
writer_Janet Kovalcik: (0.0)
writer_Jane Weinstock: (0.0)
writer_Jane Smiley: (0.0)
writer_Jane Hamilton: (0.0)
writer_Jane Anderson: (0.0)
writer_Jana Howington: (0.0)
writer_Jan Sharp: (0.0)
writer_Jan Guillou: (0.0)
writer_Jan Fischer: (0.0)
writer_Jamie Uys: (0.0)
writer_Jamie Linden: (0.0)
writer_James Watkins: (0.0)
writer_James Ward Byrkit: (0.0)
writer_James V. Simpson: (0.0)
writer_James V. Hart: (0.0)
writer_James Schamus: (0.0)
writer_James Patterson: (0.0)
writer_James Orr: (0.0)
writer_James Oliver Curwood: (0.0)
writer_James Moran: (0.0)
writer_James Merendino: (0.0)
writer_James Melkonian: (0.0)
writer_James McFarland: (0.0)
writer_James Mc

star_Rod Steiger: (0.0)
star_Robyn Lively: (0.0)
star_Robin Weigert: (0.0)
star_Robin Tunney: (0.0)
star_Robin Shou: (0.0)
star_Robert Wuhl: (0.0)
star_Robert Wisdom: (0.0)
star_Robert Stadlober: (0.0)
star_Robert Sean Leonard: (0.0)
star_Robert Redford: (0.0)
star_Robert Lindsay: (0.0)
star_Robert Hoffman: (0.0)
star_Robert Carlyle: (0.0)
star_Robert Arkins: (0.0)
star_Rob Lowe: (0.0)
star_Rob Corddry: (0.0)
star_Riz Ahmed: (0.0)
star_Ricky Gervais: (0.0)
star_Ricky Busker: (0.0)
star_Rick Yune: (0.0)
star_Richard T. Jones: (0.0)
star_Richard Pryor: (0.0)
star_Richard Linklater: (0.0)
star_Richard Jenkins: (0.0)
star_Richard Harris: (0.0)
star_Richard Grieco: (0.0)
star_Richard Farnsworth: (0.0)
star_Richard Attenborough: (0.0)
star_Richard Armitage: (0.0)
star_Richard Alexander: (0.0)
star_Ricardo Meneses: (0.0)
star_Rhys Wakefield: (0.0)
star_Rhona Mitra: (0.0)
star_Renaud: (0.0)
star_Rena Owen: (0.0)
star_Regina King: (0.0)
star_Reece Thompson: (0.0)
star_Red Steagall: (0.0)
star_R

released_7/1/2016: (0.0)
released_7/1/2011: (0.0)
released_7/1/2010: (0.0)
released_7/1/2009: (0.0)
released_7/1/2005: (0.0)
released_7/1/1998: (0.0)
released_7/1/1987: (0.0)
released_6/9/2017: (0.0)
released_6/9/1995: (0.0)
released_6/8/2001: (0.0)
released_6/7/2013: (0.0)
released_6/7/2002: (0.0)
released_6/7/1991: (0.0)
released_6/6/2013: (0.0)
released_6/6/2008: (0.0)
released_6/6/2006: (0.0)
released_6/6/2003: (0.0)
released_6/6/1986: (0.0)
released_6/5/2015: (0.0)
released_6/5/2014: (0.0)
released_6/5/2009: (0.0)
released_6/5/1996: (0.0)
released_6/5/1992: (0.0)
released_6/4/1999: (0.0)
released_6/4/1994: (0.0)
released_6/30/2010: (0.0)
released_6/30/2006: (0.0)
released_6/3/2015: (0.0)
released_6/3/2010: (0.0)
released_6/3/2005: (0.0)
released_6/3/1994: (0.0)
released_6/3/1988: (0.0)
released_6/29/1994: (0.0)
released_6/29/1990: (0.0)
released_6/28/2013: (0.0)
released_6/28/2012: (0.0)
released_6/28/2006: (0.0)
released_6/28/2002: (0.0)
released_6/28/2000: (0.0)
released_6/28/19

director_Eugene Levy: (0.0)
director_Eugene Corr: (0.0)
director_Ethan Maniquis: (0.0)
director_Etan Cohen: (0.0)
director_Erik Skjoldbjærg: (0.0)
director_Eric Till: (0.0)
director_Eric Summer: (0.0)
director_Eric Schaeffer: (0.0)
director_Eric Meza: (0.0)
director_Eric Leighton: (0.0)
director_Eric Karson: (0.0)
director_Eric Blakeney: (0.0)
director_Ellory Elkayem: (0.0)
director_Elizabeth Allen Rosenbaum: (0.0)
director_Eliseo Subiela: (0.0)
director_Eli Roth: (0.0)
director_Elaine May: (0.0)
director_Efram Potelle: (0.0)
director_Edward Norton: (0.0)
director_Edward James Olmos: (0.0)
director_Eddie Murphy: (0.0)
director_Ed Decter: (0.0)
director_E. Max Frye: (0.0)
director_E. Elias Merhige: (0.0)
director_Dylan Kidd: (0.0)
director_Dyan Cannon: (0.0)
director_Duwayne Dunham: (0.0)
director_Dustin Hoffman: (0.0)
director_Duncan Tucker: (0.0)
director_Duncan Gibbins: (0.0)
director_Duke Johnson: (0.0)
director_Drew Goddard: (0.0)
director_Drew Barrymore: (0.0)
director_Drake Dorem

company_Company Films: (0.0)
company_Compact Yellowbill: (0.0)
company_Commies From Mars Corporation: (0.0)
company_Comedy Central Films: (0.0)
company_Columbus Circle Films: (0.0)
company_Columbia TriStar Home Video: (0.0)
company_Columbia Pictures Industries: (0.0)
company_Color Force: (0.0)
company_Collective Studios, The: (0.0)
company_Codeblack Entertainment: (0.0)
company_Code Red: (0.0)
company_Code Entertainment: (0.0)
company_Cocktail Film: (0.0)
company_Cobalt Media Group: (0.0)
company_Coach Carter: (0.0)
company_Club d'Investissement Média: (0.0)
company_Cloud Atlas Productions: (0.0)
company_Clipsal Films: (0.0)
company_Clinica Estetico: (0.0)
company_Cliffjack Motion Pictures: (0.0)
company_Clear Type: (0.0)
company_Clear Productions: (0.0)
company_Clavius Base: (0.0)
company_Claussen & Wöbke Filmproduktion GmbH: (0.0)
company_Class 5 Films: (0.0)
company_Cinépix Film Properties (CFP): (0.0)
company_Cineritino S.r.L.: (0.0)
company_Cinerenta Medienbeteiligungs KG: (0.0)
c

In [31]:
# Another potential machine learning option
SCV_model = SVC(decision_function_shape="ovo").fit(X_train_scaled, y_train)

y_pred_SCV = SCV_model.predict(X_test_scaled)
round(SCV_model.score(X_test_scaled, y_test), 4)

0.6235

In [32]:
# Evaluate the model
print(f" SCV predictive accuracy: {accuracy_score(y_test,y_pred_SCV):.3f}")

 SCV predictive accuracy: 0.623


In [33]:
# Calculate the confusion matrix.
confusion_matrix(y_test, y_pred_SCV)

array([[   1,  158,    0],
       [   0, 1011,   21],
       [   0,  463,   51]], dtype=int64)

In [34]:
# Print the imbalanced classification report
print(classification_report(y_test, y_pred_SCV))

              precision    recall  f1-score   support

         0.0       1.00      0.01      0.01       159
         1.0       0.62      0.98      0.76      1032
         2.0       0.71      0.10      0.17       514

    accuracy                           0.62      1705
   macro avg       0.78      0.36      0.32      1705
weighted avg       0.68      0.62      0.51      1705

