In [None]:
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle competitions download -c unsupervised-learning-m2023

! unzip unsupervised-learning-m2023.zip

In [58]:

import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from scipy.stats import skew, kurtosis
from scipy.stats import kurtosis, entropy, skew
from scipy.signal import correlate
from scipy.fftpack import fft
from scipy.signal import find_peaks
from scipy.fftpack import fft
train_data = pd.read_csv('/content/Train.csv')
grouped = train_data.groupby('Sensor')

new_data = []

for sensor_value, group_data in grouped:
    reshaped_group = []


    for _, row in group_data.iterrows():
        data_row = row.values[:-2]
        sub_rows = np.array_split(data_row, 50)
        sensor_column = [sensor_value] * 50
        action_person_column = [row['Action_Person']] * 50
        sub_rows_with_sensor = np.column_stack((sensor_column, action_person_column, sub_rows))
        reshaped_group.extend(sub_rows_with_sensor)


    new_data.extend(reshaped_group)


columns = ['Sensor', 'Action_Person'] + [str(i) for i in range(1, 126)]
reshaped_df = pd.DataFrame(new_data, columns=columns)


actions = ['a{:02d}'.format(i) for i in range(1, 20)]
people = ['p{:d}'.format(i) for i in range(1, 9)]

class_to_integer = {}
integer = 0
for action in actions:
    for person in people:
        class_name = f'{action}_{person}'
        class_to_integer[class_name] = integer
        integer += 1



In [59]:
reshaped_df['Pulse_Indicator_False'] = reshaped_df.loc[:, '1':'125'].apply(
    lambda x: len(find_peaks(x, height=0.5)[0]) == 0, axis=1)
reshaped_df['Energy'] = np.mean(reshaped_df.loc[:, '1':'125']**2, axis=1)
reshaped_df['RMS'] = np.sqrt(np.mean(reshaped_df.loc[:, '1':'125']**2, axis=1))
reshaped_df['Power'] = np.mean(reshaped_df.loc[:, '1':'125']**2, axis=1)
reshaped_df['Skewness'] = reshaped_df.loc[:, '1':'125'].apply(skew, axis=1)
reshaped_df['Peak'] = reshaped_df.loc[:, '1':'125'].apply(lambda x: np.max(np.abs(x)), axis=1)
reshaped_df['Crest_Factor'] = reshaped_df['Peak'] / reshaped_df['RMS']
reshaped_df['Kurtosis'] = reshaped_df.loc[:, '1':'125'].apply(kurtosis, axis=1)
reshaped_df['Mean'] = reshaped_df.loc[:, '1':'125'].mean(axis=1)
reshaped_df['StdDev'] = reshaped_df.loc[:, '1':'125'].std(axis=1)
reshaped_df['Median'] = reshaped_df.loc[:, '1':'125'].median(axis=1)
reshaped_df['P2P'] = reshaped_df.loc[:, '1':'125'].apply(lambda x: np.ptp(x), axis=1)
data_columns = [str(i) for i in range(1, 126)]


for i in range(1, 16):
    col_name = f'Fourier_{i}'
    reshaped_df[col_name] = np.abs(fft(reshaped_df[data_columns], n=125)[:, i])


for i in range(3):
    lag = i + 1
    autocorrelation = reshaped_df.loc[:, '1':'125'].apply(
        lambda x: np.correlate(x, np.roll(x, lag), mode='valid')[0], axis=1)
    reshaped_df[f'Autocorrelation_{lag}'] = autocorrelation


In [None]:
clustering_data = reshaped_df[[
    'Autocorrelation_1',

    'Pulse_Indicator_False', 'Energy', 'RMS',  'Skewness','P2P',
    'Peak',  'Kurtosis', 'Mean', 'StdDev','Median',
    'Fourier_1','Fourier_2','Fourier_3','Fourier_4','Fourier_5',
    'Fourier_6','Fourier_7','Fourier_8','Fourier_9','Fourier_10',
    'Fourier_11','Fourier_12','Fourier_13','Fourier_14','Fourier_15'
]]
clustering_data

In [72]:
from sklearn.neighbors import NearestNeighbors
knn_models = {}
for sensor_value in reshaped_df['Sensor'].unique():
    sensor_data = reshaped_df[reshaped_df['Sensor'] == sensor_value]
    knn = NearestNeighbors(n_neighbors=5, algorithm='ball_tree')
    features = sensor_data[clustering_data.columns].values
    knn.fit(features)
    knn_models[sensor_value] = knn

In [62]:
test_data = pd.read_csv('/content/Test.csv')

from scipy.stats import kurtosis, entropy, skew
from scipy.signal import correlate
from scipy.fftpack import fft
from scipy.signal import find_peaks


test_data['Pulse_Indicator_False'] = test_data.loc[:, '0':'124'].apply(
    lambda x: len(find_peaks(x, height=0.5)[0]) == 0, axis=1)
test_data['Energy'] = np.mean(test_data.loc[:, '0':'124']**2, axis=1)
test_data['RMS'] = np.sqrt(np.mean(test_data.loc[:, '0':'124']**2, axis=1))
test_data['Power'] = np.mean(test_data.loc[:, '0':'124']**2, axis=1)
test_data['Skewness'] = test_data.loc[:, '0':'124'].apply(skew, axis=1)
test_data['Peak'] = test_data.loc[:, '0':'124'].apply(lambda x: np.max(np.abs(x)), axis=1)
test_data['Crest_Factor'] = test_data['Peak'] / test_data['RMS']
test_data['Kurtosis'] = test_data.loc[:, '0':'124'].apply(kurtosis, axis=1)
test_data['Mean'] = test_data.loc[:, '0':'124'].mean(axis=1)
test_data['StdDev'] = test_data.loc[:, '0':'124'].std(axis=1)
test_data['Median'] = test_data.loc[:, '0':'124'].median(axis=1)
test_data['P2P'] = test_data.loc[:, '0':'124'].apply(lambda x: np.ptp(x), axis=1)


data_columns = [str(i) for i in range(125)]


for i in range(1, 16):
    col_name = f'Fourier_{i}'
    test_data[col_name] = np.abs(fft(test_data[data_columns], n=125)[:, i])

for i in range(3):
    lag = i + 1
    autocorrelation = test_data.loc[:, '0':'124'].apply(
        lambda x: np.correlate(x, np.roll(x, lag), mode='valid')[0], axis=1)
    test_data[f'Autocorrelation_{lag}'] = autocorrelation


In [73]:
from collections import Counter

for index, row in test_data.iterrows():
    sensor_value = row[125]
    knn = knn_models.get(sensor_value)

    if knn is not None:
        features = row[['Autocorrelation_1',
                       'Pulse_Indicator_False', 'Energy', 'RMS', 'Skewness', 'P2P',
                       'Peak', 'Kurtosis', 'Mean', 'StdDev', 'Median',
                       'Fourier_1', 'Fourier_2', 'Fourier_3', 'Fourier_4', 'Fourier_5',
                       'Fourier_6', 'Fourier_7', 'Fourier_8', 'Fourier_9', 'Fourier_10',
                       'Fourier_11', 'Fourier_12', 'Fourier_13', 'Fourier_14', 'Fourier_15']]

        distances, indices = knn.kneighbors([features], n_neighbors=5)
        action_person_values = reshaped_df.loc[indices[0], 'Action_Person'].tolist()
        act_per = [class_to_integer[i] for i in action_person_values]


        weighted_votes = Counter()
        for i, action_person in enumerate(act_per):

            weight = 1 / (distances[0][i] + 1e-6)
            weighted_votes[action_person] += weight
        most_common_value = weighted_votes.most_common(1)[0][0]
        test_data.at[index, 'Action_Person'] = most_common_value
test_ids = test_data['ID']
submission_data = {
    "ID": test_ids,
    "TARGET": test_data['Action_Person'].astype(int)
}
df_Submission = pd.DataFrame(submission_data)
df_Submission.to_csv('Submission.csv', index=False)

In [65]:
test_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,Fourier_10,Fourier_11,Fourier_12,Fourier_13,Fourier_14,Fourier_15,Autocorrelation_1,Autocorrelation_2,Autocorrelation_3,Action_Person
0,-0.005553,-0.005555,-0.001927,0.003620,-0.000057,-0.016367,-0.000747,-0.009958,-0.016365,-0.010980,...,0.045482,0.173617,0.075016,0.184227,0.106758,0.045073,0.008572,0.008358,0.005982,52.0
1,-0.506510,-0.503990,-0.509880,-0.513660,-0.516020,-0.521940,-0.519750,-0.512000,-0.504350,-0.495350,...,0.332241,0.213463,0.136382,0.085865,0.103005,0.090272,32.228786,32.218633,32.204731,23.0
2,1.129800,1.299100,1.307800,1.039800,0.574590,0.116000,-0.169270,-0.109780,-0.094517,-0.265830,...,16.276733,12.576402,7.073188,7.019461,5.378773,2.473391,26.442286,17.646610,8.635733,108.0
3,-1.000500,-0.993640,-1.015500,-0.955950,-0.971360,-0.986350,-0.926550,-1.030600,-0.941640,-0.861130,...,0.193631,0.201342,0.197892,0.199989,0.138363,0.073225,113.812729,114.079124,114.036532,21.0
4,-0.774860,-0.752180,-0.711310,-0.657230,-0.621070,-0.587910,-0.541140,-0.483030,-0.423670,-0.381250,...,0.715235,0.656922,0.562581,0.289567,0.896042,0.478125,47.430247,47.096534,46.637797,73.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68395,-0.135500,-0.886300,-0.188440,-0.029687,0.510920,0.209590,-0.555300,0.292200,-0.059148,-0.058811,...,0.501111,13.099167,13.045619,20.059701,16.414172,17.105472,83.434428,50.933488,65.183620,59.0
68396,3.845800,3.890500,3.860600,3.875700,3.890600,3.875600,3.897700,3.860700,3.875600,3.860700,...,0.060018,0.164073,0.475433,0.445855,0.243978,0.418400,1857.591908,1857.592897,1857.573294,20.0
68397,-0.049571,-0.104900,-0.208050,-0.258960,-0.313430,-0.397730,-0.467360,-0.540420,-0.587290,-0.627270,...,1.811265,1.285587,1.174066,1.880125,0.727064,0.707259,19.436043,18.876480,18.186128,79.0
68398,-0.106560,-0.106210,-0.104960,-0.107040,-0.105120,-0.106120,-0.105700,-0.106280,-0.105070,-0.105540,...,0.003004,0.014547,0.005456,0.004861,0.011988,0.012957,1.355736,1.355732,1.355719,6.0
