#### import libraries

In [178]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE

#### Load the dataset

In [179]:
good = pd.read_csv('..\good horn.csv')
resonator = pd.read_csv('../resonator.csv')
centermass = pd.read_csv('..\centermass.csv')
diaphragm = pd.read_csv('../diaphragm.csv')

max_cols = min(len(good), min(len(resonator), min(len(centermass), len(diaphragm))))

In [180]:
good = good[:max_cols]
resonator = resonator[:max_cols]
centermass = centermass[:max_cols]
diaphragm = diaphragm[:max_cols]

In [181]:
good = good.set_index(good.iloc[:, 0])
good = good.drop('Time(s)', axis = 1)

resonator = resonator.set_index(resonator.iloc[:, 0])
resonator = resonator.drop('Time(s)', axis = 1)

centermass = centermass.set_index(centermass.iloc[:, 0])
centermass = centermass.drop('Time(s)', axis = 1)

diaphragm = diaphragm.set_index(diaphragm.iloc[:, 0])
diaphragm = diaphragm.drop('Time(s)', axis = 1)

In [182]:
good = good.transpose()
resonator = resonator.transpose()
centermass = centermass.transpose()
diaphragm = diaphragm.transpose()

In [183]:
good.head()

Time(s),0.00,0.02,0.04,0.06,0.08,0.10,0.12,0.14,0.16,0.18,...,21.84,21.86,21.88,21.90,21.92,21.94,21.96,21.98,22.00,22.02
1,0.3531,0.017,-0.3108,0.0597,0.1928,-0.0188,-0.0078,-0.0344,-0.0243,-0.3884,...,-0.2273,0.478,0.0266,-0.3062,-0.0051,0.1983,-0.0014,-0.0064,-0.0441,0.0565
2,0.2301,-0.106,-0.4338,-0.0633,0.0698,-0.1418,-0.1308,-0.1574,-0.1473,-0.5114,...,-0.3503,0.355,-0.0964,-0.4292,-0.1281,0.0753,-0.1244,-0.1294,-0.1671,-0.0665
3,0.1301,-0.206,-0.5338,-0.1633,-0.0302,-0.2418,-0.2308,-0.2574,-0.2473,-0.6114,...,-0.4503,0.255,-0.1964,-0.5292,-0.2281,-0.0247,-0.2244,-0.2294,-0.2671,-0.1665
4,0.0301,-0.306,-0.6338,-0.2633,-0.1302,-0.3418,-0.3308,-0.3574,-0.3473,-0.7114,...,-0.5503,0.155,-0.2964,-0.6292,-0.3281,-0.1247,-0.3244,-0.3294,-0.3671,-0.2665
5,0.1231,-0.213,-0.5408,-0.1703,-0.0372,-0.2488,-0.2378,-0.2644,-0.2543,-0.6184,...,-0.4573,0.248,-0.2034,-0.5362,-0.2351,-0.0317,-0.2314,-0.2364,-0.2741,-0.1735


In [184]:
good['target'] = 0
resonator['target'] = 1
centermass['target'] = 2
diaphragm['target'] = 3

#### Compile all together

In [185]:
compiled = pd.concat([good, resonator, centermass, diaphragm], axis = 0, ignore_index=True)

In [186]:
compiled.head()
compiled.to_csv('compiled.csv')

### Oversample SMOTE

In [187]:
X = compiled.drop('target', axis = 1)
y = compiled.target

#### Apply SMOTE

In [188]:
smote = SMOTE(sampling_strategy={0: 1000, 1:1000, 2:1000, 3:1000})

In [189]:
x_resamp, y_resamp = smote.fit_resample(X, y)

In [190]:
x_resamp = pd.DataFrame(x_resamp)
y_resamp = pd.DataFrame(y_resamp)
oversampled_data = pd.concat([x_resamp, y_resamp], axis = 1)

In [191]:
oversampled_data.to_csv('oversampled.csv')

### Check

In [192]:
oversampled_data.target.value_counts()

target
0    1000
1    1000
2    1000
3    1000
Name: count, dtype: int64

In [193]:
compiled.describe()

Time(s),0.0,0.02,0.04,0.06,0.08,0.1,0.12,0.14,0.16,0.18,...,21.86,21.88,21.9,21.92,21.94,21.96,21.98,22.0,22.02,target
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,...,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,0.034553,-0.103669,-0.161178,-0.035133,-0.044661,-0.050549,-0.06252,-0.157763,-0.046104,-0.201967,...,0.017486,0.041041,-0.2214,-0.067533,-0.155561,-0.116412,0.010147,0.146692,-0.057484,1.469388
std,0.187082,0.184812,0.258814,0.196912,0.251454,0.242751,0.204898,0.241421,0.188188,0.297263,...,0.273878,0.267854,0.250915,0.183456,0.347781,0.209701,0.209909,0.282736,0.248352,1.138384
min,-0.7514,-0.7979,-0.8355,-0.7596,-0.9874,-0.7937,-0.8402,-0.9303,-0.7449,-0.8538,...,-0.6878,-0.832,-0.9044,-0.7025,-0.9433,-0.8049,-0.7031,-0.5571,-0.8832,0.0
25%,-0.0579,-0.1979,-0.3622,-0.1373,-0.1741,-0.193,-0.1802,-0.2703,-0.1473,-0.4554,...,-0.183,-0.1368,-0.4032,-0.1701,-0.5073,-0.2244,-0.1034,-0.0691,-0.188,0.0
50%,0.0619,-0.1027,-0.0901,-0.0319,-0.0372,-0.056,-0.045,-0.1574,-0.0243,-0.1211,...,0.114,-0.0134,-0.1937,-0.0461,-0.0479,-0.1244,0.0339,0.2229,-0.0757,1.0
75%,0.1375,0.0121,0.0099,0.0537,0.0958,0.0258,0.0153,-0.0653,0.0737,-0.0352,...,0.18,0.2062,-0.0911,0.0927,0.0753,0.0051,0.1199,0.3778,0.0315,2.0
max,0.3531,0.2357,0.2599,0.353,0.4218,0.4533,0.3477,0.3347,0.2823,0.3306,...,0.478,0.5442,0.2563,0.238,0.3348,0.2837,0.4012,0.5557,0.4595,3.0


In [175]:
oversampled_data.describe()

Unnamed: 0,0.0,0.02,0.04,0.06,0.08,0.1,0.12,0.14,0.16,0.18,...,21.86,21.88,21.9,21.92,21.94,21.96,21.98,22.0,22.02,target
count,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,...,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,0.043717,-0.090383,-0.142259,-0.021308,-0.033808,-0.035409,-0.047859,-0.144533,-0.030758,-0.182283,...,0.023692,0.057142,-0.203833,-0.053033,-0.147133,-0.103008,0.026292,0.166467,-0.044058,1.5
std,0.150131,0.157551,0.240744,0.170771,0.22266,0.221921,0.178394,0.218721,0.162524,0.281707,...,0.247189,0.246073,0.232499,0.159366,0.330006,0.186346,0.186768,0.265163,0.224895,1.118174
min,-0.7514,-0.7979,-0.8355,-0.7596,-0.9874,-0.7937,-0.8402,-0.9303,-0.7449,-0.8538,...,-0.6878,-0.832,-0.9044,-0.7025,-0.9433,-0.8049,-0.7031,-0.5571,-0.8832,0.0
25%,-0.038029,-0.185587,-0.345328,-0.131438,-0.177,-0.185728,-0.166832,-0.272274,-0.140483,-0.433505,...,-0.176242,-0.115273,-0.395695,-0.16846,-0.514544,-0.219077,-0.106145,-0.074706,-0.174064,0.75
50%,0.064603,-0.091837,-0.059231,-0.019617,-0.046568,-0.059718,-0.03953,-0.158818,-0.012281,-0.110584,...,0.115036,-0.017836,-0.160441,-0.047445,-0.046211,-0.116221,0.062341,0.244621,-0.076801,1.5
75%,0.133733,0.011286,0.009301,0.05426,0.100473,0.033695,0.027848,-0.065247,0.073305,-0.034923,...,0.184212,0.213326,-0.087383,0.103289,0.070904,0.007031,0.124263,0.385753,0.038186,2.25
max,0.3531,0.2357,0.2599,0.353,0.4218,0.4533,0.3477,0.3347,0.2823,0.3306,...,0.478,0.5442,0.2563,0.238,0.3348,0.2837,0.4012,0.5557,0.4595,3.0
