In [1]:
from scipy.io.wavfile import read, write
import librosa

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

from Audio_Preprocess import *

In [2]:
# Import data
npz = np.load('Sentiment_analysis_data.npz')
x = npz['inputs']
y = npz['targets']

In [3]:
# Standarise data
scaled_x = preprocessing.scale(x)

In [4]:
# Shuffle data
n = x.shape[0]
shuffled_indices = np.arange(n)
np.random.shuffle(shuffled_indices)

shuffled_x = scaled_x[shuffled_indices]
shuffled_y = y[shuffled_indices]

In [5]:
# Merge inputs and targets
data = add_label_arrays(shuffled_x, np.expand_dims(shuffled_y, axis=1))

In [6]:
# Create dataframe from numpy arrays
columns = [ 'c' + str(i) for i in range(28) ] + [ 'label' ]
df = pd.DataFrame(data=data, columns=columns)
df.head(5)

Unnamed: 0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,...,c19,c20,c21,c22,c23,c24,c25,c26,c27,label
0,0.3431628650878422,1.159931215544506,-0.6722793478148194,0.613737809687941,0.8671790916383292,0.0709244571672214,-0.9743372478695403,-1.5048087359158218,0.9941466448667023,1.5178366343533465,...,-0.3349579814560467,1.581160145084519,0.5639332120523426,1.1445082664316335,-0.3357706193051832,1.6438633130013935,1.4700974120537167,0.8904749590317185,-0.8768771621142933,sad
1,0.3089545921118043,-1.5825778872148428,1.4248200036292369,0.7674760335829872,0.57351480086283,1.1020624647597066,-0.9237548098987672,-0.2486700195540332,-0.3674583137056594,-1.1121059325057765,...,1.712472296286189,0.1394273918521374,1.1583172961740618,0.6759672262532609,1.4632453978603188,-0.0851440958355541,-1.0634095782014796,0.3701441392047316,-0.8073163687330629,happy
2,-1.662875417425459,0.0371227308318545,-1.1620087438042763,-1.7105163780122423,-1.6902133675353668,-1.6093519276238055,0.5040925207951985,0.6156167851233635,-1.4709878292314231,0.2313486775745487,...,-0.631884750528149,-0.8484223349672926,-1.5158034067902593,-1.446942333321158,-1.308912304553516,-0.9738603696757568,-0.7624844050686826,-1.6969555560320944,1.6040261580395567,happy
3,1.0107579602258088,0.3855239408384842,0.4094680879898581,0.3293025347413142,0.2495194750342077,0.4363650056968779,1.39399953697311,1.1378619703464905,0.8442994980703807,-0.6370793794221187,...,-0.7456295643019912,-0.8721652019693638,-0.206447101436137,-0.3735331593637344,0.1814375259983843,-0.5848588474900814,0.3557965712164369,0.4363364577956476,0.080167372807792,sad


In [7]:
# Create dummy labels
df.label = pd.get_dummies(df.label, drop_first=True)

In [8]:
# Create training and testing sets
X = df.iloc[:,:-1]
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25)
print(X_train.shape, y_train.shape)

(3, 28) (3,)


In [14]:
# Define Random Forest Classifier model and predict label
rf = RandomForestClassifier(max_depth=2, random_state=0)
rf.fit(X, y)
rf.predict(X_test)

array([0], dtype=uint8)

In [15]:
# Get probability of prediction
rf.predict_proba(X_test)

array([[0.75, 0.25]])

In [16]:
# Get score of prediction
rf.score(X_test, y_test)

1.0