In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import glob

# Assuming your Excel file is named 'your_excel_file.xlsx' and is in the current working directory
# Update the file path accordingly
excel_file_path      = '/content/drive/MyDrive/ML/project/SEECS_Data/Data.xlsx'
X_labels_path        = '/content/drive/MyDrive/ML/project/SEECS_Data/X_labels/*.lab'

# Read the Excel file into a pandas DataFrame
df = pd.read_excel(excel_file_path, header=None)

# Get the values from the DataFrame
data = df.values

# Reshape the data to the desired shape (15 x 512)
desired_shape = (15, 512)

# Ensure that the number of columns is divisible by 512
if data.shape[1] % desired_shape[1] == 0:
    # Reshape the data to the desired shape
    X_reshaped_data = np.reshape(data, desired_shape, order='F')  # Use order='F' to fill the reshaped array column-wise
    print("Shape of reshaped data:", X_reshaped_data.shape)
else:
    print("Number of columns in the Excel data is not divisible by 512. Adjust the data or desired shape.")

# Get a list of file paths that match the specified pattern for label files
X_labels_file_paths = glob.glob(X_labels_path)

# Initialize an empty list to store the labels
X_labels = []

# Define a mapping from words to numerical values
label_mapping = {"disike": 0, "like": 1}

# Iterate over the list of label file paths and convert each label to a numerical value
for X_labels_file_path in X_labels_file_paths:
    # Read the content of the label file
    with open(X_labels_file_path, 'r') as X_label_file:
        X_label_word = X_label_file.read().strip().lower()
        # print(X_label_word)
        # Convert the label word to a numerical value using the mapping
        label_value = label_mapping.get(X_label_word, None)
        # print(label_value)
        # print(X_label_file)
        if label_value is not None:
            X_labels.append(label_value)

# Convert the list of labels to a numpy array
X_labels = np.array(X_labels).reshape(-1, 1)

# Display the shape of the final labels
print("Shape of X_labels:", X_labels.shape)
# Concatenate flattened_data and labels horizontally
X_combined_data = np.hstack((X_reshaped_data, X_labels))

# Display the shape of the combined data
print("Shape of X_combined data:", X_combined_data.shape)
print(X_combined_data)


Shape of reshaped data: (15, 512)
Shape of X_labels: (15, 1)
Shape of X_combined data: (15, 513)
[[33 32 32 ... 54 57  1]
 [33 32 32 ... 87 88  1]
 [33 32 32 ... 81 81  1]
 ...
 [32 32 32 ... 60 33  0]
 [32 32 32 ... 39 41  0]
 [32 32 32 ... 24 43  0]]


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Assuming combined_data is your feature matrix (X) and labels is your target variable (y)
# You can replace these with your actual data
X = X_combined_data[:, :-1]  # Features (flattened data)
y = X_combined_data[:, -1]   # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier
svm_classifier = SVC(kernel='linear', C=1.0)

# Train the classifier on the training data
svm_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)



# Assuming X_combined_data is your new feature matrix (X_new)
X_new = X_combined_data[:, :-1]  # Features (flattened data)
y_new = X_combined_data[:, -1]   # Labels

# Make predictions on the new data
y_new_pred = svm_classifier.predict(X_new)

# Calculate accuracy on the new data
accuracy_new = accuracy_score(y_new, y_new_pred)
print("Accuracy on new data:", accuracy_new)
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve
# Plot the confusion matrix
cm = confusion_matrix(y_new, y_new_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Dislike', 'Like'], yticklabels=['Dislike', 'Like'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
# Calculating and printing the accuracy of the model
accuracy = accuracy_score(y_new, y_new_pred)
print("Accuracy of the logistic regression model:", accuracy)

report = classification_report(y_new, y_new_pred)
print("Classification Report:")
print(report)

plt.figure(figsize=(12, 8))

# Confusion Matrix Plot
plt.subplot(2, 2, 1)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Dislike', 'Like'], yticklabels=['Dislike', 'Like'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')

# Distribution of Predicted Classes
plt.subplot(2, 2, 2)
sns.barplot(x=np.unique(y_pred, return_counts=True)[0], y=np.unique(y_pred, return_counts=True)[1])
plt.title('Distribution of Predicted Classes')
plt.xlabel('Predicted Class')
plt.ylabel('Count')

# Distribution of True Classes
plt.subplot(2, 2, 3)
sns.barplot(x=np.unique(y_test, return_counts=True)[0], y=np.unique(y_test, return_counts=True)[1])
plt.title('Distribution of True Classes')
plt.xlabel('True Class')
plt.ylabel('Count')

plt.tight_layout()
plt.show()
