In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the CSV file
data = pd.read_csv("HW1.csv")

# Split the data into training and testing sets
X = data.iloc[:, 1:].values  # Features
y = data.iloc[:, 0].values   # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=5818, random_state=42)

# Normalize the input features using the mean and standard deviation of the training data
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Define the DNN model
class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(11, 1000)
        self.fc2 = nn.Linear(1000, 250)
        self.fc3 = nn.Linear(250, 100)
        #self.fc4 = nn.Linear(500, 100)
        #self.fc5 = nn.Linear(100, 10)
        self.out = nn.Linear(100, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        #x = F.relu(self.fc4(x))
        #x = F.relu(self.fc5(x))
        x = self.out(x)
        return x

model = DNN()

# Train the model
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

X_train_tensor = torch.tensor(X_train_normalized, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32)

epochs = 20
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}")

# Evaluate the model on the testing data
with torch.no_grad():
    model.eval()
    X_test_tensor = torch.tensor(X_test_normalized, dtype=torch.float32)
    y_pred = model(X_test_tensor).numpy()

# Use Mean Squared Error (MSE) to evaluate the performance of the model
from sklearn.metrics import mean_squared_error

# Calculate training MSE
with torch.no_grad():
    model.eval()
    y_train_pred = model(torch.tensor(X_train_normalized, dtype=torch.float32)).numpy()
    train_mse = mean_squared_error(y_train, y_train_pred)

print(f"Training MSE: {train_mse}")

# Calculate testing MSE
with torch.no_grad():
    model.eval()
    y_test_pred = model(torch.tensor(X_test_normalized, dtype=torch.float32)).numpy()
    test_mse = mean_squared_error(y_test, y_test_pred)

print(f"Testing MSE: {test_mse}")

# Calculate accuracy for the training data
accuracy_sum = 0
for i in range(len(y_train)):
    target_value = y_train[i]
    if target_value == 0:
        target_value = 1  # Replace target value of 0 with 1
    accuracy_sum += abs((y_train_pred[i] - target_value) / target_value)

accuracy = 1 - (accuracy_sum / len(y_train))
print("Training Accuracy:", accuracy)

# Plot the fitting curve of the third input feature (danceability)
# Extract danceability values from the training data
danceability_train = X_train_normalized[:, 2]  # Assuming danceability is the third feature (index 2)

# Predict song popularity for the training data
with torch.no_grad():
    model.eval()
    y_train_pred = model(torch.tensor(X_train_normalized, dtype=torch.float32)).numpy()

# Plot the fitting curve
plt.figure(figsize=(10, 6))
plt.scatter(danceability_train, y_train, color='blue', label='Actual')
plt.scatter(danceability_train, y_train_pred, color='red', label='Predicted')
plt.xlabel('Danceability')
plt.ylabel('Song Popularity')
plt.title('Fitting Curve of Danceability vs. Song Popularity (Training Data)')
plt.legend()
plt.show()