# **CSCE 5218 / CSCE 4930 Deep Learning**

# **HW1a The Perceptron** (20 pt)


In [2]:
# Get the datasets
def print_file_head(file_path, lines=5):
    """Reads and prints the first few lines of a text file."""
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            head = [next(file) for _ in range(lines)]
            print(f"Head of {file_path}:\n{''.join(head)}\n")
    except FileNotFoundError:
        print(f"Error: {file_path} not found.")
    except StopIteration:
        print(f"Error: {file_path} is empty.")

# File paths
test_small_path = r"C:\Users\mukth\Downloads\test_small (1).txt"
train_path = r"C:\Users\mukth\Downloads\train (1).txt"

In [3]:
# Take a peek at the datasets
print_file_head(test_small_path)
print_file_head(train_path)

Head of C:\Users\mukth\Downloads\test_small (1).txt:
X1	X2	X3
1	1	1	1
0	0	1	1
0	1	1	0
0	1	1	0


Head of C:\Users\mukth\Downloads\train (1).txt:
A1	A2	A3	A4	A5	A6	A7	A8	A9	A10	A11	A12	A13	
1	1	0	0	0	0	0	0	1	1	0	0	1	0
0	0	1	1	0	1	1	0	0	0	0	0	1	0
0	1	0	1	1	0	1	0	1	1	1	0	1	1
0	0	1	0	0	1	0	1	0	1	1	1	1	0




### Build the Perceptron Model

You will need to complete some of the function definitions below.  DO NOT import any other libraries to complete this. 

In [5]:
import math
import itertools
import re


# Corpus reader, all columns but the last one are coordinates;
#   the last column is the label
def read_data(file_name):
    f = open(file_name, 'r')

    data = []
    # Discard header line
    f.readline()
    for instance in f.readlines():
        if not re.search('\t', instance): continue
        instance = list(map(int, instance.strip().split('\t')))
        # Add a dummy input so that w0 becomes the bias
        instance = [-1] + instance
        data += [instance]
    return data


def dot_product(array1, array2):
    #TODO: Return dot product of array 1 and array 
    return sum(a * b for a, b in zip(array1, array2))
    return NotImplemented 


def sigmoid(x):
    #TODO: Return outpout of sigmoid function on x
    return 1 / (1 + math.exp(-x))
    return NotImplemented

# The output of the model, which for the perceptron is 
# the sigmoid function applied to the dot product of 
# the instance and the weights
def output(weight, instance):
    #TODO: return the output of the model 
    return sigmoid(dot_product(weights, instance))
    return NotImplemented

# Predict the label of an instance; this is the definition of the perceptron
# you should output 1 if the output is >= 0.5 else output 0
def predict(weights, instance):
    #TODO: return the prediction of the model
    return 1 if output(weights, instance) >= 0.5 else 0


# Accuracy = percent of correct predictions
def get_accuracy(weights, instances):
    # You do not to write code like this, but get used to it
    correct = sum([1 if predict(weights, instance) == instance[-1] else 0
                   for instance in instances])
    return correct * 100 / len(instances)


# Train a perceptron with instances and hyperparameters:
#       lr (learning rate) 
#       epochs
# The implementation comes from the definition of the perceptron
#
# Training consists on fitting the parameters which are the weights
# that's the only thing training is responsible to fit
# (recall that w0 is the bias, and w1..wn are the weights for each coordinate)
#
# Hyperparameters (lr and epochs) are given to the training algorithm
# We are updating weights in the opposite direction of the gradient of the error,
# so with a "decent" lr we are guaranteed to reduce the error after each iteration.
def train_perceptron(instances, lr, epochs):
    """Train a perceptron using the given instances, learning rate (lr), and number of epochs."""
    weights = [0] * (len(instances[0]) - 1)  # Initialize weights (one less than the number of columns)

    for _ in range(epochs):
        for instance in instances:
            in_value = dot_product(weights, instance)  # Compute the dot product
            out_value = sigmoid(in_value)  # Apply sigmoid to get output
            error = instance[-1] - out_value  # Compute error (label - predicted value)

            # Update weights using the gradient descent rule
            for i in range(len(weights)):
                weights[i] += lr * error * out_value * (1 - out_value) * instance[i]  # Weight update rule

    return weights

## Run it

In [7]:
instances_tr = read_data(r"C:\Users\mukth\Downloads\train (1).txt")
instances_te = read_data(r"C:\Users\mukth\Downloads\test_small (1).txt")
lr = 0.005
epochs = 5
weights = train_perceptron(instances_tr, lr, epochs)
accuracy = get_accuracy(weights, instances_te)
print(f"#tr: {len(instances_tr):3}, epochs: {epochs:3}, learning rate: {lr:.3f}; "
      f"Accuracy (test, {len(instances_te)} instances): {accuracy:.1f}")

#tr: 400, epochs:   5, learning rate: 0.005; Accuracy (test, 14 instances): 71.4


## Questions

Answer the following questions. Include your implementation and the output for each question.



### Question 1
 
In `train_perceptron(instances, lr, epochs)`, we have the follosing code:
```
in_value = dot_product(weights, instance)
output = sigmoid(in_value)
error = instance[-1] - output
```

Why don't we have the following code snippet instead?
```
output = predict(weights, instance)
error = instance[-1] - output
```

#### TODO Add your answer here (text only)
The dot product calculates the weighted sum of inputs.
The sigmoid function is applied to generate a smooth, continuous output between 0 and 1.
The error is determined from this output, allowing for proper gradient-based weight updates.
The predict() function typically applies a threshold (e.g., returns 1 if output ≥ 0.5, else 0).
This would make the output discrete (0 or 1), which is not suitable for training.
Gradient-based learning requires a continuous output (from sigmoid) in order to compute meaningful updates.
A discrete output would have zero gradients almost everywhere, and learning would not be possible.
We use sigmoid(dot_product(.)) while training in order to maintain smooth learning, whereas predict() is used only for ultimate classification after training is complete.




### Question 2
Train the perceptron with the following hyperparameters and calculate the accuracy with the test dataset.

```
tr_percent = [5, 10, 25, 50, 75, 100] # percent of the training dataset to train with
num_epochs = [5, 10, 20, 50, 100]              # number of epochs
lr = [0.005, 0.01, 0.05]              # learning rate
```

TODO: Write your code below and include the output at the end of each training loop (NOT AFTER EACH EPOCH)
of your code.The output should look like the following:
```
# tr:  20, epochs:   5, learning rate: 0.005; Accuracy (test, 100 instances): 68.0
# tr:  20, epochs:  10, learning rate: 0.005; Accuracy (test, 100 instances): 68.0
# tr:  20, epochs:  20, learning rate: 0.005; Accuracy (test, 100 instances): 68.0
[and so on for all the combinations]
```
You will get different results with different hyperparameters.

#### TODO Add your answer here (code and output in the format above) 


In [11]:
import random

def train_and_evaluate(train_data, test_data, tr_percent, num_epochs, lr):
    """Trains the perceptron on different training sizes and evaluates it."""
    train_size = int(len(train_data) * (tr_percent / 100))
    train_subset = random.sample(train_data, train_size)  # Select a subset of training data
    
    weights = train_perceptron(train_subset, lr, num_epochs)  # Train the perceptron
    accuracy = get_accuracy(weights, test_data)  # Compute accuracy on test set
    
    print(f"# tr: {train_size}, epochs: {num_epochs:3}, learning rate: {lr:.3f}; Accuracy (test, {len(test_data)} instances): {accuracy:.1f}")

# Load datasets
train_data = read_data(r"C:\Users\mukth\Downloads\train (1).txt")  # Assuming training data is in train.txt
test_data = read_data(r"C:\Users\mukth\Downloads\test (1).txt")    # Assuming test data is in test.txt

# Define hyperparameter values
tr_percentages = [5, 10, 25, 50, 75, 100]
num_epochs_list = [5, 10, 20, 50, 100]
lr_values = [0.005, 0.01, 0.05]

# Run training and evaluation for each combination of hyperparameters
for tr_percent in tr_percentages:
    for num_epochs in num_epochs_list:
        for lr in lr_values:
            train_and_evaluate(train_data, test_data, tr_percent, num_epochs, lr)

# tr: 20, epochs:   5, learning rate: 0.005; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:   5, learning rate: 0.010; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:   5, learning rate: 0.050; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:  10, learning rate: 0.005; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:  10, learning rate: 0.010; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:  10, learning rate: 0.050; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:  20, learning rate: 0.005; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:  20, learning rate: 0.010; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:  20, learning rate: 0.050; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:  50, learning rate: 0.005; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:  50, learning rate: 0.010; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs:  50, learning rate: 0.050; Accuracy (test, 100 instances): 68.0
# tr: 20, epochs

In [12]:
instances_tr = read_data(r"C:\Users\mukth\Downloads\train (1).txt")
instances_te = read_data(r"C:\Users\mukth\Downloads\test_small (1).txt")
tr_percent = [5, 10, 25, 50, 75, 100] # percent of the training dataset to train with
num_epochs = [5, 10, 20, 50, 100]     # number of epochs
lr_array = [0.005, 0.01, 0.05]        # learning rate

for lr in lr_array:
  for tr_size in tr_percent:
    for epochs in num_epochs:
      size =  round(len(instances_tr)*tr_size/100)
      pre_instances = instances_tr[0:size]
      weights = train_perceptron(pre_instances, lr, epochs)
      accuracy = get_accuracy(weights, instances_te)
    print(f"#tr: {len(pre_instances):0}, epochs: {epochs:3}, learning rate: {lr:.3f}; "
            f"Accuracy (test, {len(instances_te)} instances): {accuracy:.1f}")

#tr: 20, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 85.7
#tr: 40, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 71.4
#tr: 100, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 71.4
#tr: 200, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 85.7
#tr: 300, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 85.7
#tr: 400, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 71.4
#tr: 20, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 42.9
#tr: 40, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 85.7
#tr: 100, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 28.6
#tr: 200, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 85.7
#tr: 300, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 85.7
#tr: 400, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 71.4
#tr: 20, epochs: 100, learning r

### Question 3
Write a couple paragraphs interpreting the results with all the combinations of hyperparameters. Drawing a plot will probably help you make a point. In particular, answer the following:
- A. Do you need to train with all the training dataset to get the highest accuracy with the test dataset?
- B. How do you justify that training the second run obtains worse accuracy than the first one (despite the second one uses more training data)?
   ```
#tr: 100, epochs:  20, learning rate: 0.050; Accuracy (test, 100 instances): 71.0
#tr: 200, epochs:  20, learning rate: 0.005; Accuracy (test, 100 instances): 68.0
```
- C. Can you get higher accuracy with additional hyperparameters (higher than `80.0`)?
- D. Is it always worth training for more epochs (while keeping all other hyperparameters fixed)?

#### TODO: Add your answer here (code and text)



In [36]:
import random
import matplotlib.pyplot as plt

# Load datasets
train_data = read_data(r"C:\Users\mukth\Downloads\train (1).txt")  # Assuming training data is in train.txt
test_data = read_data(r"C:\Users\mukth\Downloads\test (1).txt")   

train_data
def train_and_evaluate(train_data, test_data, tr_percent, num_epochs, lr):
    """Trains the perceptron on different training sizes and evaluates it."""
    train_size = int(len(train_data) * (tr_percent / 100))
    train_subset = random.sample(train_data, train_size)  # Select a subset of training data
    
    
    weights = train_perceptron(train_subset, lr, num_epochs)  # Train the perceptron
    accuracy = get_accuracy(weights, test_data)  # Compute accuracy on test set
    
    print(f"# tr: {train_size}, epochs: {num_epochs:3}, learning rate: {lr:.3f}; Accuracy (test, {len(test_data)} instances): {accuracy:.1f}")
    return train_size, num_epochs, lr, accuracy

 # Assuming test data is in test.txt

# Define hyperparameter values
tr_percentages = [5, 10, 25, 50, 75, 100]
num_epochs_list = [5, 10, 20, 50, 100]
lr_values = [0.005, 0.01, 0.05]

# Store results for plotting
results = []
# Run training and evaluation for each combination of hyperparameters
for lr in lr_values:
  for tr_size in tr_percentages:
    for epochs in num_epochs_list:
      size =  round(len(instances_tr)*tr_size/100)
      pre_instances = instances_tr[0:size]
      weights = train_perceptron(pre_instances, lr, epochs)
      accuracy = get_accuracy(weights, instances_te)
    print(f"#tr: {len(pre_instances):0}, epochs: {epochs:3}, learning rate: {lr:.3f}; "
            f"Accuracy (test, {len(instances_te)} instances): {accuracy:.1f}")

# Ensure plt is defined before plotting
import matplotlib.pyplot as plt




#tr: 20, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 85.7
#tr: 40, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 71.4
#tr: 100, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 71.4
#tr: 200, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 85.7
#tr: 300, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 85.7
#tr: 400, epochs: 100, learning rate: 0.005; Accuracy (test, 14 instances): 71.4
#tr: 20, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 42.9
#tr: 40, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 85.7
#tr: 100, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 28.6
#tr: 200, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 85.7
#tr: 300, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 85.7
#tr: 400, epochs: 100, learning rate: 0.010; Accuracy (test, 14 instances): 71.4
#tr: 20, epochs: 100, learning r