In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [45]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier

In [46]:
# Data Loading
train_data = pd.read_csv('/content/drive/MyDrive/unsupervised-learning-m2023/Train.csv')
test_data = pd.read_csv('/content/drive/MyDrive/unsupervised-learning-m2023/Test.csv')

# Data Exploration and Preprocessing
# Explore the structure of the training data
print("Training Data Info:")
print(train_data.info())

# Explore the first few rows of the training data
print("\nTraining Data Sample:")
print(train_data.head())

# Check for missing values in the training data
missing_values = train_data.isnull().sum()
print("\nMissing Values in Training Data:")
print(missing_values)


Training Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6840 entries, 0 to 6839
Columns: 6252 entries, 0 to Action_Person
dtypes: float64(6250), object(2)
memory usage: 326.3+ MB
None

Training Data Sample:
          0         1         2         3         4         5         6  \
0  8.130500  8.130500  8.160400  8.160300  8.160500  8.145400  8.145400   
1  1.034900  1.020200  1.020100  1.005200  1.027500  1.020100  1.049500   
2  5.421700  5.384300  5.362200  5.377000  5.347300  5.391900  5.451800   
3 -0.009461 -0.009368  0.015046  0.006892  0.008811  0.007001 -0.005710   
4  0.001915  0.023485  0.014330  0.018045  0.030433  0.029523  0.014217   

          7         8         9  ...      6242      6243      6244      6245  \
0  8.137600  8.130700  8.115500  ...  7.846600  7.846700  7.802100  7.839200   
1  0.997440  1.064600  1.020200  ...  1.460400  1.467900  1.461100  1.460600   
2  5.511100  5.392100  5.421500  ...  5.761100  5.723900  5.671300  5.723800   
3  0.010

In [47]:
# Data Exploration and Preprocessing
# Explore the structure of the training data
print("Test Data Info:")
print(test_data.info())

# Explore the first few rows of the training data
print("\nTest Data Sample:")
print(test_data.head())

# Check for missing values in the training data
missing_values = test_data.isnull().sum()
print("\nMissing Values in Test Data:")
print(missing_values)

Test Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 68400 entries, 0 to 68399
Columns: 127 entries, 0 to ID
dtypes: float64(125), int64(1), object(1)
memory usage: 66.3+ MB
None

Test Data Sample:
          0         1         2        3         4         5         6  \
0 -0.005553 -0.005555 -0.001927  0.00362 -0.000057 -0.016367 -0.000747   
1 -0.506510 -0.503990 -0.509880 -0.51366 -0.516020 -0.521940 -0.519750   
2  1.129800  1.299100  1.307800  1.03980  0.574590  0.116000 -0.169270   
3 -1.000500 -0.993640 -1.015500 -0.95595 -0.971360 -0.986350 -0.926550   
4 -0.774860 -0.752180 -0.711310 -0.65723 -0.621070 -0.587910 -0.541140   

          7         8        9  ...       117       118       119       120  \
0 -0.009958 -0.016365 -0.01098  ...  0.025315  0.004389  0.003512  0.004473   
1 -0.512000 -0.504350 -0.49535  ... -0.502250 -0.506890 -0.507520 -0.502950   
2 -0.109780 -0.094517 -0.26583  ...  0.503220  0.929130  0.997340  1.140400   
3 -1.030600 -0.941640 -0.861

In [48]:
# Display basic statistics about the training data
train_data.describe()

# Check unique values in the 'Action_Person' column
unique_action_persons = train_data['Action_Person'].unique()
print(f"Unique Action_Persons: {unique_action_persons}")

# Check unique values in the 'Sensor' column
unique_sensors = train_data['Sensor'].unique()
print(f"Unique Sensors: {unique_sensors}")


Unique Action_Persons: ['a01_p1' 'a01_p2' 'a01_p3' 'a01_p4' 'a01_p5' 'a01_p6' 'a01_p7' 'a01_p8'
 'a02_p1' 'a02_p2' 'a02_p3' 'a02_p4' 'a02_p5' 'a02_p6' 'a02_p7' 'a02_p8'
 'a03_p1' 'a03_p2' 'a03_p3' 'a03_p4' 'a03_p5' 'a03_p6' 'a03_p7' 'a03_p8'
 'a04_p1' 'a04_p2' 'a04_p3' 'a04_p4' 'a04_p5' 'a04_p6' 'a04_p7' 'a04_p8'
 'a05_p1' 'a05_p2' 'a05_p3' 'a05_p4' 'a05_p5' 'a05_p6' 'a05_p7' 'a05_p8'
 'a06_p1' 'a06_p2' 'a06_p3' 'a06_p4' 'a06_p5' 'a06_p6' 'a06_p7' 'a06_p8'
 'a07_p1' 'a07_p2' 'a07_p3' 'a07_p4' 'a07_p5' 'a07_p6' 'a07_p7' 'a07_p8'
 'a08_p1' 'a08_p2' 'a08_p3' 'a08_p4' 'a08_p5' 'a08_p6' 'a08_p7' 'a08_p8'
 'a09_p1' 'a09_p2' 'a09_p3' 'a09_p4' 'a09_p5' 'a09_p6' 'a09_p7' 'a09_p8'
 'a10_p1' 'a10_p2' 'a10_p3' 'a10_p4' 'a10_p5' 'a10_p6' 'a10_p7' 'a10_p8'
 'a11_p1' 'a11_p2' 'a11_p3' 'a11_p4' 'a11_p5' 'a11_p6' 'a11_p7' 'a11_p8'
 'a12_p1' 'a12_p2' 'a12_p3' 'a12_p4' 'a12_p5' 'a12_p6' 'a12_p7' 'a12_p8'
 'a13_p1' 'a13_p2' 'a13_p3' 'a13_p4' 'a13_p5' 'a13_p6' 'a13_p7' 'a13_p8'
 'a14_p1' 'a14_p2' 'a14_p3' 

In [49]:

# Determine the number of rows per entry (125 columns per entry)
rows_per_entry = 125

# Determine the number of last columns to add (2 columns)
columns_to_add = 2

# Initialize an empty list to store the new rows
new_rows = []

# Iterate through the original data
for index, row in train_data.iterrows():
    # Split each row into multiple rows with 125 columns
    for i in range(0, len(row)-2, rows_per_entry):
        entry = row[i:i+rows_per_entry].values.tolist()
        # Add the last 2 columns explicitly to each entry
        entry.extend(row[-columns_to_add:].values.tolist())
        new_rows.append(entry)

# Create a new DataFrame from the modified rows
new_data = pd.DataFrame(new_rows)
new_data.columns = list(new_data.columns[:-2]) + ['Sensor', 'Action_Person']
# You can save this new dataset to a new CSV file if needed
# new_data.to_csv('new_dataset.csv', index=False)

# Display the first few rows of the new dataset
print(new_data.head())

        0       1       2       3       4       5       6       7       8  \
0  8.1305  8.1305  8.1604  8.1603  8.1605  8.1454  8.1454  8.1376  8.1307   
1  7.9665  7.9516  7.9889  7.9367  7.9365  7.9516  7.9515  7.9290  7.9290   
2  7.8917  7.9065  7.8692  7.8917  7.8991  7.9366  7.8917  7.9068  7.9213   
3  7.9366  7.9665  7.9067  7.9218  7.9512  7.9291  7.9068  7.9439  7.9217   
4  7.8913  7.8696  7.8622  7.8920  7.9068  7.9369  7.9743  8.0113  7.9821   

        9  ...     117     118     119     120     121     122     123  \
0  8.1155  ...  7.9515  7.9292  7.9367  7.9515  7.9442  7.9517  7.9743   
1  7.9364  ...  7.9214  7.8619  7.9512  7.9290  7.9067  7.9364  7.9067   
2  7.9140  ...  7.9663  7.9216  7.9216  7.9289  7.9290  7.9216  7.9439   
3  7.9216  ...  7.9066  7.9217  7.9219  8.0711  8.0104  7.7576  7.6901   
4  7.9287  ...  7.9665  7.9665  7.9964  7.9065  7.9369  8.0559  8.1305   

      124  Sensor  Action_Person  
0  7.9812  T-xacc         a01_p1  
1  7.8991  T-xacc     

In [50]:
# Data Exploration and Preprocessing
# Explore the structure of the training data
print("New Data Info:")
print(new_data.info())

# Explore the first few rows of the training data
print("\nNew Data Sample:")
print(new_data.head())

# Check for missing values in the training data
missing_values = new_data.isnull().sum()
print("\nMissing Values in New Data:")
print(missing_values)

New Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 342000 entries, 0 to 341999
Columns: 127 entries, 0 to Action_Person
dtypes: float64(125), object(2)
memory usage: 331.4+ MB
None

New Data Sample:
        0       1       2       3       4       5       6       7       8  \
0  8.1305  8.1305  8.1604  8.1603  8.1605  8.1454  8.1454  8.1376  8.1307   
1  7.9665  7.9516  7.9889  7.9367  7.9365  7.9516  7.9515  7.9290  7.9290   
2  7.8917  7.9065  7.8692  7.8917  7.8991  7.9366  7.8917  7.9068  7.9213   
3  7.9366  7.9665  7.9067  7.9218  7.9512  7.9291  7.9068  7.9439  7.9217   
4  7.8913  7.8696  7.8622  7.8920  7.9068  7.9369  7.9743  8.0113  7.9821   

        9  ...     117     118     119     120     121     122     123  \
0  8.1155  ...  7.9515  7.9292  7.9367  7.9515  7.9442  7.9517  7.9743   
1  7.9364  ...  7.9214  7.8619  7.9512  7.9290  7.9067  7.9364  7.9067   
2  7.9140  ...  7.9663  7.9216  7.9216  7.9289  7.9290  7.9216  7.9439   
3  7.9216  ...  7.9066  7.92

In [51]:
# Define the number of clusters, activities, and persons
num_clusters = 152
num_activities = 19
num_persons = 8

# Initialize an empty dictionary for cluster mapping
cluster_mapping = {}

# Use a nested for loop to populate the cluster_mapping dictionary
cluster_label = 0
for activity_label in unique_action_persons:
    # for person_label in range(num_persons):
      cluster_mapping[cluster_label] = activity_label
      cluster_label += 1
cluster_mapping= {v: k for k, v in cluster_mapping.items()}
# Print the resulting cluster_mapping dictionary
print(cluster_mapping)


{'a01_p1': 0, 'a01_p2': 1, 'a01_p3': 2, 'a01_p4': 3, 'a01_p5': 4, 'a01_p6': 5, 'a01_p7': 6, 'a01_p8': 7, 'a02_p1': 8, 'a02_p2': 9, 'a02_p3': 10, 'a02_p4': 11, 'a02_p5': 12, 'a02_p6': 13, 'a02_p7': 14, 'a02_p8': 15, 'a03_p1': 16, 'a03_p2': 17, 'a03_p3': 18, 'a03_p4': 19, 'a03_p5': 20, 'a03_p6': 21, 'a03_p7': 22, 'a03_p8': 23, 'a04_p1': 24, 'a04_p2': 25, 'a04_p3': 26, 'a04_p4': 27, 'a04_p5': 28, 'a04_p6': 29, 'a04_p7': 30, 'a04_p8': 31, 'a05_p1': 32, 'a05_p2': 33, 'a05_p3': 34, 'a05_p4': 35, 'a05_p5': 36, 'a05_p6': 37, 'a05_p7': 38, 'a05_p8': 39, 'a06_p1': 40, 'a06_p2': 41, 'a06_p3': 42, 'a06_p4': 43, 'a06_p5': 44, 'a06_p6': 45, 'a06_p7': 46, 'a06_p8': 47, 'a07_p1': 48, 'a07_p2': 49, 'a07_p3': 50, 'a07_p4': 51, 'a07_p5': 52, 'a07_p6': 53, 'a07_p7': 54, 'a07_p8': 55, 'a08_p1': 56, 'a08_p2': 57, 'a08_p3': 58, 'a08_p4': 59, 'a08_p5': 60, 'a08_p6': 61, 'a08_p7': 62, 'a08_p8': 63, 'a09_p1': 64, 'a09_p2': 65, 'a09_p3': 66, 'a09_p4': 67, 'a09_p5': 68, 'a09_p6': 69, 'a09_p7': 70, 'a09_p8': 71, '

In [52]:
# Extract the numerical columns for scaling
numerical_columns = new_data.columns[:125]  # Exclude 'Sensor' and 'Action_Person'
print(numerical_columns)
numerical_columns

Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
       ...
       115, 116, 117, 118, 119, 120, 121, 122, 123, 124],
      dtype='object', length=125)


Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
       ...
       115, 116, 117, 118, 119, 120, 121, 122, 123, 124],
      dtype='object', length=125)

In [53]:
new_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,117,118,119,120,121,122,123,124,Sensor,Action_Person
0,8.1305,8.1305,8.1604,8.1603,8.1605,8.1454,8.1454,8.1376,8.1307,8.1155,...,7.9515,7.9292,7.9367,7.9515,7.9442,7.9517,7.9743,7.9812,T-xacc,a01_p1
1,7.9665,7.9516,7.9889,7.9367,7.9365,7.9516,7.9515,7.929,7.929,7.9364,...,7.9214,7.8619,7.9512,7.929,7.9067,7.9364,7.9067,7.8991,T-xacc,a01_p1
2,7.8917,7.9065,7.8692,7.8917,7.8991,7.9366,7.8917,7.9068,7.9213,7.914,...,7.9663,7.9216,7.9216,7.9289,7.929,7.9216,7.9439,7.9588,T-xacc,a01_p1
3,7.9366,7.9665,7.9067,7.9218,7.9512,7.9291,7.9068,7.9439,7.9217,7.9216,...,7.9066,7.9217,7.9219,8.0711,8.0104,7.7576,7.6901,7.809,T-xacc,a01_p1
4,7.8913,7.8696,7.8622,7.892,7.9068,7.9369,7.9743,8.0113,7.9821,7.9287,...,7.9665,7.9665,7.9964,7.9065,7.9369,8.0559,8.1305,8.0486,T-xacc,a01_p1


In [54]:
# Group the data by the "Sensor" column
sensor_groups = new_data.groupby('Sensor')

# Create a dictionary to store separate DataFrames for each sensor
sensor_data_dict = {}

# Iterate through the groups and store them in the dictionary
for sensor, data in sensor_groups:
    sensor_data_dict[sensor] = data

# Now, sensor_data_dict contains separate DataFrames for each sensor
# You can access each sensor's data using dictionary keys, e.g., sensor_data_dict['T-xacc']


In [55]:
# Assuming sensor_data_dict is your dictionary containing sensor data
number_of_sensors = len(sensor_data_dict.keys())
print("Number of sensors:", number_of_sensors)


Number of sensors: 45


In [56]:
print(sensor_data_dict)

{'LA-xacc':               0        1        2         3         4        5        6  \
900      3.4193   3.4340   3.4291   3.44370   3.44860   3.4437   3.4389   
901      3.4143   3.4242   3.3972   3.42420   3.41680   3.4193   3.3974   
902      3.3755   3.3706   3.3877   3.38520   3.38030   3.3949   3.3974   
903      3.3803   3.3705   3.3973   3.36570   3.38030   3.3460   3.3997   
904      3.4264   3.3703   3.3314   3.34600   3.40940   3.4434   3.4434   
...         ...      ...      ...       ...       ...      ...      ...   
340695   5.7488   5.6833  15.9840  12.05600  10.03600   2.0622  20.6970   
340696  -8.3488  -6.4033  -6.2621   0.94088  -0.60201  -1.1687  -2.6198   
340697  15.0180  25.5410  15.7630  17.77600  14.76600  12.0620   9.9501   
340698 -17.5960 -22.3650  -2.0017  67.59100  13.05400   1.7273   9.6623   
340699   3.9239   3.9755   5.1190   6.02760   6.24470   6.5794   6.4998   

              7        8        9  ...      117      118      119       120  \
900     

In [57]:
for i,j in sensor_data_dict.items():
  print(j)
  break

              0        1        2         3         4        5        6  \
900      3.4193   3.4340   3.4291   3.44370   3.44860   3.4437   3.4389   
901      3.4143   3.4242   3.3972   3.42420   3.41680   3.4193   3.3974   
902      3.3755   3.3706   3.3877   3.38520   3.38030   3.3949   3.3974   
903      3.3803   3.3705   3.3973   3.36570   3.38030   3.3460   3.3997   
904      3.4264   3.3703   3.3314   3.34600   3.40940   3.4434   3.4434   
...         ...      ...      ...       ...       ...      ...      ...   
340695   5.7488   5.6833  15.9840  12.05600  10.03600   2.0622  20.6970   
340696  -8.3488  -6.4033  -6.2621   0.94088  -0.60201  -1.1687  -2.6198   
340697  15.0180  25.5410  15.7630  17.77600  14.76600  12.0620   9.9501   
340698 -17.5960 -22.3650  -2.0017  67.59100  13.05400   1.7273   9.6623   
340699   3.9239   3.9755   5.1190   6.02760   6.24470   6.5794   6.4998   

              7        8        9  ...      117      118      119       120  \
900      3.4340   3.

In [58]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.stats import mode
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from scipy.stats import mode
from sklearn.decomposition import PCA
from scipy.stats import skew, kurtosis
from scipy.fft import fft  # Import the fft function from scipy.fft
from scipy.signal import find_peaks
# Assuming 'sensor_data_dict' is a dictionary containing sensor-specific data
# with 'Sensor' as the key and the sensor's data as the DataFrame value.
# Modify the variable names accordingly.



sensors = list(train_data['Sensor'].unique())
hold_ans = []

cols = 125
def calculate_statistical_features(inp_df):
    stat_df = pd.DataFrame()
    stat_df['Mean'] = inp_df.mean(axis=1)
    stat_df['Median'] = inp_df.median.median(axis=1)
    # stat_df['Mean1'] = inp_df.mean(axis=1)
    # stat_df['Median1'] = inp_df.median.median(axis=1)
    # stat_df['Mean2'] = inp_df.mean(axis=1)
    # stat_df['Median2'] = inp_df.median.median(axis=1)
    # stat_df['Mean3'] = inp_df.mean(axis=1)
    # stat_df['Median3'] = inp_df.median.median(axis=1)
    # stat_df['Mean4'] = inp_df.mean(axis=1)
    # stat_df['Median4'] = inp_df.median.median(axis=1)
    # stat_df['Variance'] = inp_df.var(axis=1)
    # stat_df['Min'] = inp_df.min(axis=1)
    # stat_df['Max'] = inp_df.max(axis=1)
    # stat_df['Std_Deviation'] = inp_df.std(axis=1)
    # stat_df['Skew'] = stat_df.iloc[:, 0:cols].apply(lambda x: skew(x), axis=1)

    return stat_df

# def calculate_fft_features(inp_df):
#     transformed_np = inp_df.select_dtypes(include=np.number).to_numpy()

#     # Calculate FFT features for each row and return them directly
#     print(np.abs(np.fft.fft(transformed_np, axis=1)[:, :125]))
#     return np.abs(np.fft.fft(transformed_np, axis=1)[:, :125])


def new_stats(inp_df):
    statistical_features = calculate_statistical_features(inp_df)
    fft_features = calculate_fft_features(inp_df)

    out_df = pd.concat([inp_df, statistical_features], axis=1)

    return out_df

def calculate_additional_features(data):
    # data['Std_Deviation'] = data.iloc[:, 0:cols].std(axis=1)
    data['Mean'] = data.iloc[:, 0:cols].mean(axis=1)
    data['Median'] = data.iloc[:, 0:cols].median(axis=1)
    data['Mean1'] = data.iloc[:, 0:cols].mean(axis=1)
    data['Median1'] = data.iloc[:, 0:cols].median(axis=1)
    data['Mean2'] = data.iloc[:, 0:cols].mean(axis=1)
    data['Median2'] = data.iloc[:, 0:cols].median(axis=1)
    data['Mean3'] = data.iloc[:, 0:cols].mean(axis=1)
    data['Median3'] = data.iloc[:, 0:cols].median(axis=1)
    # data['Max'] = data.iloc[:, 0:cols].max(axis=1)
    # data['Min'] = data.iloc[:, 0:cols].min(axis=1)
    #auto corr media
    # data['Variance'] = data.iloc[:, 0:cols].var(axis=1)
    # data['Skew'] = data.iloc[:, 0:cols].apply(lambda x: skew(x), axis=1)
    # data['Kurtosis'] = data.iloc[:, 0:cols].apply(lambda x: kurtosis(x), axis=1)
    return data





new_data_np = new_data.select_dtypes(include=np.number).to_numpy()

# Calculate FFT features for each row and return them directly
new_data_fft = np.abs(np.fft.fft(new_data_np, axis=1)[:, :125])

test_data_cpy = test_data.copy()
test_data_cpy = test_data_cpy.drop('ID', axis=1)
test_data_np = test_data_cpy.select_dtypes(include=np.number).to_numpy()

# Calculate FFT features for each row and return them directly
test_data_fft = np.abs(np.fft.fft(test_data_np, axis=1)[:, :125])


# print(test_data_fft)




def calculate_fft_features(inp_df):
    transformed_np = inp_df.select_dtypes(include=np.number).to_numpy()
    transformed_fft = np.abs(np.fft.fft(transformed_np, axis=1)[:, :125])

    return pd.DataFrame(transformed_fft, columns=[str(i) for i in range(125)])

def replace_columns_with_fft(data):
    fft_features = calculate_fft_features(data)
    data = data.drop(data.select_dtypes(include=np.number).columns, axis=1)  # Drop original numeric columns
    data = pd.concat([data, fft_features], axis=1)  # Concatenate FFT features

    return data

new_data = replace_columns_with_fft(new_data)
test_data_cpy = test_data.copy()
test_data_cpy = test_data_cpy.drop('ID', axis=1)
test_data_cpy = replace_columns_with_fft(test_data_cpy)
test_data_cpy['ID'] = test_data['ID']

# print(test_data_cpy)
print(new_data)
test_data = test_data_cpy
print(test_data)


# new_data_fft['Action_Person'] = new_data['Action_Person']
# new_data_fft['Sensor'] = new_data['Sensor']


# test_data_fft['ID'] = test_data['ID']
# test_data_fft['Sensor'] = test_data['Sensor']

# new_data = new_data_fft
# test_data = test_data_fft




# new_data = new_stats(new_data)
# test_data = new_stats(test_data)
# transformed_np = transformed_df.select_dtypes(include=np.number).to_numpy()

# # Calculate FFT features for training data
# transformed_fft = np.abs(np.fft.fft(transformed_np, axis=1))


         Sensor Action_Person           0          1         2         3  \
0        T-xacc        a01_p1  996.964300   9.666240  1.568899  0.767884   
1        T-xacc        a01_p1  997.281200   6.428833  2.430362  1.015020   
2        T-xacc        a01_p1  996.361700   4.226515  1.441549  0.188068   
3        T-xacc        a01_p1  992.301500   2.843812  1.352425  0.351981   
4        T-xacc        a01_p1  988.616200   1.839532  0.925161  0.574614   
...         ...           ...         ...        ...       ...       ...   
341995  LL-zmag        a19_p8   14.084998  12.521777  9.543620  4.328994   
341996  LL-zmag        a19_p8   65.404510   7.182267  1.309905  2.370184   
341997  LL-zmag        a19_p8   69.001810   7.686105  6.948886  5.826607   
341998  LL-zmag        a19_p8   69.819000   4.692527  1.966101  2.512156   
341999  LL-zmag        a19_p8   60.273680   2.857196  0.907283  3.077532   

               4         5         6         7  ...       115       116  \
0       1.06

In [59]:
# Extract the first two columns
first_two_columns = new_data.iloc[:, :2]

# Remove the first two columns from the DataFrame
new_data = new_data.drop(new_data.columns[:2], axis=1)

# Concatenate the first two columns to the end of the DataFrame
new_data = pd.concat([new_data, first_two_columns], axis=1)


In [60]:
# Extract the first two columns
first_one_columns = test_data.iloc[:, :1]

# Remove the first two columns from the DataFrame
test_data = test_data.drop(test_data.columns[:1], axis=1)

# Concatenate the first two columns to the end of the DataFrame
test_data = pd.concat([test_data, first_one_columns], axis=1)


In [61]:
new_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,117,118,119,120,121,122,123,124,Sensor,Action_Person
0,996.9643,9.66624,1.568899,0.767884,1.068505,1.995355,0.702969,0.433404,0.850263,0.596228,...,0.850263,0.433404,0.702969,1.995355,1.068505,0.767884,1.568899,9.66624,T-xacc,a01_p1
1,997.2812,6.428833,2.430362,1.01502,1.263887,0.471838,0.404185,0.819379,0.125459,0.46306,...,0.125459,0.819379,0.404185,0.471838,1.263887,1.01502,2.430362,6.428833,T-xacc,a01_p1
2,996.3617,4.226515,1.441549,0.188068,0.506063,0.520032,0.041368,0.354703,0.196871,0.248057,...,0.196871,0.354703,0.041368,0.520032,0.506063,0.188068,1.441549,4.226515,T-xacc,a01_p1
3,992.3015,2.843812,1.352425,0.351981,0.728914,1.011964,0.669194,0.704757,0.244105,0.506071,...,0.244105,0.704757,0.669194,1.011964,0.728914,0.351981,1.352425,2.843812,T-xacc,a01_p1
4,988.6162,1.839532,0.925161,0.574614,0.426568,0.393549,0.373807,0.721115,0.740414,0.42512,...,0.740414,0.721115,0.373807,0.393549,0.426568,0.574614,0.925161,1.839532,T-xacc,a01_p1


In [62]:
test_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,117,118,119,120,121,122,123,124,ID,Sensor
0,0.316171,0.481516,0.235888,0.206684,0.301267,0.198386,0.180089,0.142567,0.190976,0.094143,...,0.190976,0.142567,0.180089,0.198386,0.301267,0.206684,0.235888,0.481516,11435,T-zgyro
1,63.42234,0.80654,0.546547,0.27196,0.126446,0.556396,0.362667,0.233555,0.233866,1.220799,...,0.233866,0.233555,0.362667,0.556396,0.126446,0.27196,0.546547,0.80654,44936,LA-zmag
2,12.09139,5.107543,5.485173,19.766282,11.266815,2.940304,13.920087,4.914253,5.292393,14.401886,...,5.292393,4.914253,13.920087,2.940304,11.266815,19.766282,5.485173,5.107543,48630,RL-xgyro
3,119.35772,0.132982,1.644258,0.542818,0.631334,0.167316,0.080025,0.071638,0.142079,0.225983,...,0.142079,0.071638,0.080025,0.167316,0.631334,0.542818,1.644258,0.132982,496,T-yacc
4,74.66903,1.770446,1.705041,2.526851,7.664523,9.122744,2.990507,1.797488,1.322396,3.288134,...,1.322396,1.797488,2.990507,9.122744,7.664523,2.526851,1.705041,1.770446,33154,RL-ymag


In [63]:
new_data = calculate_additional_features(new_data)
test_data = calculate_additional_features(test_data)

In [64]:
print(new_data)

                 0          1         2         3         4         5  \
0       996.964300   9.666240  1.568899  0.767884  1.068505  1.995355   
1       997.281200   6.428833  2.430362  1.015020  1.263887  0.471838   
2       996.361700   4.226515  1.441549  0.188068  0.506063  0.520032   
3       992.301500   2.843812  1.352425  0.351981  0.728914  1.011964   
4       988.616200   1.839532  0.925161  0.574614  0.426568  0.393549   
...            ...        ...       ...       ...       ...       ...   
341995   14.084998  12.521777  9.543620  4.328994  6.149058  2.046966   
341996   65.404510   7.182267  1.309905  2.370184  2.033874  2.438995   
341997   69.001810   7.686105  6.948886  5.826607  0.788474  2.852041   
341998   69.819000   4.692527  1.966101  2.512156  2.088042  2.235498   
341999   60.273680   2.857196  0.907283  3.077532  0.677607  5.133851   

               6         7         8         9  ...   Sensor  Action_Person  \
0       0.702969  0.433404  0.850263  0.5962

In [65]:
print(sensors)
for sensor in sensors:
    X = new_data[new_data['Sensor'] == sensor]
    Z = test_data[test_data['Sensor'] == sensor]

    label_encoder = LabelEncoder()
    X = X.sample(frac=1).reset_index(drop=True)
    y = label_encoder.fit_transform(X['Action_Person'])
    X = X.drop(['Action_Person', 'Sensor'], axis=1)

    id = Z['ID']
    Z = Z.drop(['Sensor', 'ID'], axis=1)
    Z.columns = [int(col) if isinstance(col, str) and col.isdigit() else col for col in Z.columns]

    # Standardize the features using StandardScaler
    # scaler = StandardScaler()
    # X = scaler.fit_transform(X)
    # X = calculate_additional_features(X)
    # Z = calculate_additional_features(Z)
    X.columns = X.columns.astype(str)
    Z.columns = Z.columns.astype(str)


    model = NearestNeighbors(n_neighbors=1,p=1,metric='manhattan')
    model.fit(X)

    dist, idx = model.kneighbors(X)
    preds = np.array([mode(y[neighbors]).mode for neighbors in idx])
    acc = accuracy_score(preds, y)
    print(f"Accuracy for {sensor} (Train Data): {acc}")

    # Standardize the test data using the same scaler
    # Z = scaler.transform(Z)

    dist, idx = model.kneighbors(Z)
    ans = np.array([mode(y[neighbors]).mode for neighbors in idx])
    ans_df = pd.DataFrame({'ID': id, 'Target': ans})
    hold_ans.append(ans_df)
# feate st mean median vaar etc
output = pd.concat(hold_ans, ignore_index=True)
output.to_csv("SubmissionN.csv", index=False)


['T-xacc', 'T-yacc', 'T-zacc', 'T-xgyro', 'T-ygyro', 'T-zgyro', 'T-xmag', 'T-ymag', 'T-zmag', 'RA-xacc', 'RA-yacc', 'RA-zacc', 'RA-xgyro', 'RA-ygyro', 'RA-zgyro', 'RA-xmag', 'RA-ymag', 'RA-zmag', 'LA-xacc', 'LA-yacc', 'LA-zacc', 'LA-xgyro', 'LA-ygyro', 'LA-zgyro', 'LA-xmag', 'LA-ymag', 'LA-zmag', 'RL-xacc', 'RL-yacc', 'RL-zacc', 'RL-xgyro', 'RL-ygyro', 'RL-zgyro', 'RL-xmag', 'RL-ymag', 'RL-zmag', 'LL-xacc', 'LL-yacc', 'LL-zacc', 'LL-xgyro', 'LL-ygyro', 'LL-zgyro', 'LL-xmag', 'LL-ymag', 'LL-zmag']
Accuracy for T-xacc (Train Data): 1.0
Accuracy for T-yacc (Train Data): 1.0
Accuracy for T-zacc (Train Data): 1.0
Accuracy for T-xgyro (Train Data): 1.0
Accuracy for T-ygyro (Train Data): 1.0
Accuracy for T-zgyro (Train Data): 1.0
Accuracy for T-xmag (Train Data): 1.0
Accuracy for T-ymag (Train Data): 1.0
Accuracy for T-zmag (Train Data): 1.0
Accuracy for RA-xacc (Train Data): 1.0
Accuracy for RA-yacc (Train Data): 1.0
Accuracy for RA-zacc (Train Data): 1.0
Accuracy for RA-xgyro (Train Data): 