# Team SCaLAR NITK
# Team Leader : A D Mahit Nandan
# Email: mahitnandanad@gmail.com / mahitnandanad.211ai001@nitk.edu.in

### Requirements

- Pandas version: 2.0.3
- NumPy version: 1.24.3
- scikit-learn version: 1.4.0

You can execute the below cell for pip installing the above modules if necessary by uncommenting the below cell

In [None]:
# !pip install pandas==2.0.3
# !pip install numpy==1.24.3
# !pip install scikit-learn==1.4.0

# Developement Phase

## Run below cells for developement phase

### Folder Structure (for Development Phase)

- **train folder:** This folder contains 360 input CSV files used for training.

- **validation folder:** This folder contains 180 input CSV files used for validation.

- **train_labels.csv:** This CSV file contains the labels corresponding to the training data in the train folder.

- **main.ipynb:** This Jupyter Notebook contains the main code for the development phase.



In [17]:
import pandas as pd
import os

#Load train_labels CSV file to get filenames and corresponding labels
train_labels_path = 'train_labels.csv'
train_labels_df = pd.read_csv(train_labels_path)

# Step 2: Process each CSV file
compressed_data = []
for index, row in train_labels_df.iterrows():
    filename = row['filename']
    label = row['class']
    csv_file_path = os.path.join('train', filename)  # Adjust the path to the train folder
    df = pd.read_csv(csv_file_path)
    
    # Remove the time column
    df = df.drop(columns=['Time'])
    std_values = df.std()
    std_values[std_values == 0] = 1
    # Center the data about its mean
    df_centered = (df - df.mean())/std_values
    
    # Calculate the mean of each column
    mean_values = df_centered.mean().tolist()
    
    # Append label to mean_values
    mean_values.append(label)
    
    # Append to compressed_data list
    compressed_data.append(mean_values)

#Create a DataFrame from compressed_data and write to a new CSV file
columns = [f'column_{i+1}' for i in range(len(mean_values)-1)] + ['label']
compressed_df = pd.DataFrame(compressed_data, columns=columns)

# Write to CSV file
compressed_df.to_csv('compressed_data.csv', index=False)

In [18]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
import zipfile

# Load compressed data
compressed_data_path = 'compressed_data.csv'  # Replace with the actual path
compressed_df = pd.read_csv(compressed_data_path)

# Step 1: Load test data
test_folder_path = 'validation'  # Replace with the actual path
test_files = os.listdir(test_folder_path)

# Initialize list to store predictions
predictions = []

# Step 2: Process each test CSV file
for test_file in test_files:
    test_file_path = os.path.join(test_folder_path, test_file)
    df_test = pd.read_csv(test_file_path)
    
    # Remove the time column
    df_test = df_test.drop(columns=['Time'])
    std_values = df_test.std()
    std_values[std_values == 0] = 1
    df_test = (df_test - df_test.mean())/std_values
    
    # Calculate the mean of each column
    mean_values_test = df_test.mean().values.reshape(1, -1)
    
    # Fit a k-nearest neighbors model
    knn = NearestNeighbors(n_neighbors=32, metric='euclidean')
    knn.fit(compressed_df.iloc[:, :-1])  # Fit on the compressed data
    
    # Find the k nearest neighbors
    _, indices = knn.kneighbors(mean_values_test)
    
    # Get the labels of the k nearest neighbors
    neighbor_labels = compressed_df.iloc[indices[0], -1]
    
    # Assign emotion label based on the majority class among the k neighbors
    emotion_label = neighbor_labels.mode()[0]
    
    # Append filename and predicted class to predictions list
    predictions.append({'filename': test_file, 'predicted_class': emotion_label})

# Create DataFrame from predictions
submission_df = pd.DataFrame(predictions)

# Write to CSV file
predictions_filename = 'predictions.csv'
submission_df.to_csv(predictions_filename, index=False)

# Zip the predictions file
zip_filename = 'predictions.zip'
with zipfile.ZipFile(zip_filename, 'w') as zipf:
    zipf.write(predictions_filename, arcname=os.path.basename(predictions_filename))

print(f'{zip_filename} created successfully.')




predictions.zip created successfully.


# Test Phase

### Folder Structure (for Test Phase)

- **train folder:** This folder contains 360 input CSV files used for training.

- **validation folder:** This folder contains 180 input CSV files used for validation.

- **train_labels.csv:** This CSV file contains the labels corresponding to the training data in the train folder.

- **train_labels.csv:** This CSV file contains the labels corresponding to the validation data in the train folder.

- **main.ipynb:** This Jupyter Notebook contains the main code for the test phase.

## Run below cells for test phase

In [19]:
import pandas as pd
import os

#Load train_labels CSV file to get filenames and corresponding labels
train_labels_path = 'train_labels.csv'
train_labels_df = pd.read_csv(train_labels_path)

#Process each CSV file
compressed_data = []
for index, row in train_labels_df.iterrows():
    filename = row['filename']
    label = row['class']
    csv_file_path = os.path.join('train', filename) 
    df = pd.read_csv(csv_file_path)
    
    # Remove the time column
    df = df.drop(columns=['Time'])
    std_values = df.std()
    std_values[std_values == 0] = 1
    # Center the data about its mean
    df_centered = (df - df.mean())/std_values
    
    # Calculate the mean of each column
    mean_values = df_centered.mean().tolist()
    
    # Append label to mean_values
    mean_values.append(label)
    
    # Append to compressed_data list
    compressed_data.append(mean_values)

#Create a DataFrame from compressed_data and write to a new CSV file
columns = [f'column_{i+1}' for i in range(len(mean_values)-1)] + ['label']
compressed_df = pd.DataFrame(compressed_data, columns=columns)

# Write to CSV file
compressed_df.to_csv('compressed_train_data.csv', index=False)

In [20]:
import pandas as pd
import os

#Load train_labels CSV file to get filenames and corresponding labels
train_labels_path = 'validation_labels.csv'
train_labels_df = pd.read_csv(train_labels_path)

# Step 2: Process each CSV file
compressed_data = []
for index, row in train_labels_df.iterrows():
    filename = row['filename']
    label = row['class']
    csv_file_path = os.path.join('validation', filename)
    df = pd.read_csv(csv_file_path)
    
    # Remove the time column
    df = df.drop(columns=['Time'])
    std_values = df.std()
    std_values[std_values == 0] = 1
    # Center the data about its mean
    df_centered = (df - df.mean())/std_values
    
    # Calculate the mean of each column
    mean_values = df_centered.mean().tolist()
    
    # Append label to mean_values
    mean_values.append(label)
    
    # Append to compressed_data list
    compressed_data.append(mean_values)

#Create a DataFrame from compressed_data and write to a new CSV file
columns = [f'column_{i+1}' for i in range(len(mean_values)-1)] + ['label']
compressed_df = pd.DataFrame(compressed_data, columns=columns)

# Write to CSV file
compressed_df.to_csv('compressed_validation_data.csv', index=False)

In [21]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
import zipfile

# Load compressed data
compressed_train_data_path = 'compressed_train_data.csv' 
compressed_train_df = pd.read_csv(compressed_train_data_path)

# Load compressed data
compressed_validation_data_path = 'compressed_validation_data.csv'  
compressed_validation_df = pd.read_csv(compressed_validation_data_path)

# Concatenate compressed_train_df and compressed_validation_df vertically
compressed_df = pd.concat([compressed_train_df, compressed_validation_df], ignore_index=True)

print("Shape of compressed_train_df:", compressed_train_df.shape)
print("Shape of compressed_validation_df:", compressed_validation_df.shape)

# Verify the shape of the new dataframe
print("Shape of compressed_df:", compressed_df.shape)
compressed_df.to_csv('compressed_train_validation_data.csv', index=False)
# Step 1: Load test data
test_folder_path = 'test'  # Replace with the actual path
test_files = os.listdir(test_folder_path)

# Initialize list to store predictions
predictions = []

# Step 2: Process each test CSV file
for test_file in test_files:
    test_file_path = os.path.join(test_folder_path, test_file)
    df_test = pd.read_csv(test_file_path)
    
    # Remove the time column
    df_test = df_test.drop(columns=['Time'])
    std_values = df_test.std()
    std_values[std_values == 0] = 1
    df_test = (df_test - df_test.mean())/std_values
    
    # Calculate the mean of each column
    mean_values_test = df_test.mean().values.reshape(1, -1)
    
    # Fit a k-nearest neighbors model
    knn = NearestNeighbors(n_neighbors=90, metric='euclidean')
    knn.fit(compressed_df.iloc[:, :-1])  # Fit on the compressed data
    
    # Find the k nearest neighbors
    _, indices = knn.kneighbors(mean_values_test)
    
    # Get the labels of the k nearest neighbors
    neighbor_labels = compressed_df.iloc[indices[0], -1]
    
    # Assign emotion label based on the majority class among the k neighbors
    emotion_label = neighbor_labels.mode()[0]
    
    # Append filename and predicted class to predictions list
    predictions.append({'filename': test_file, 'predicted_class': emotion_label})

# Create DataFrame from predictions
submission_df = pd.DataFrame(predictions)

# Write to CSV file
submission_df.to_csv('predictions.csv', index=False)
# Zip the predictions file
zip_filename = 'predictions.zip'
with zipfile.ZipFile(zip_filename, 'w') as zipf:
    zipf.write(predictions_filename, arcname=os.path.basename(predictions_filename))

print(f'{zip_filename} created successfully.')

Shape of compressed_train_df: (360, 25)
Shape of compressed_validation_df: (180, 25)
Shape of compressed_df: (540, 25)




predictions.zip created successfully.




# Thank You !!