# **Basic Python**

## 1.a 
Give a function F that takes a single argument n. This function should print a sequence of n strings as described in the examples below: <br>
Ex: **n = 3**

--A--<br>
-BAB-<br>
CBABC<br>

**n = 4**

---A---<br>
--BAB--<br>
-CBABC-<br>
DCBABCD

In [2]:
def F(n):
    for i in range(n):
        left_padding = '-' * (n - i - 1)
        right_padding = left_padding
        middle = ''.join([chr(ord('A') + abs(j)) for j in range(-i, i + 1)])
        print(f"{left_padding}{middle}{right_padding}")

# Tests
F(3)
F(4)
F(10)
F(6)

--A--
-BAB-
CBABC
---A---
--BAB--
-CBABC-
DCBABCD
---------A---------
--------BAB--------
-------CBABC-------
------DCBABCD------
-----EDCBABCDE-----
----FEDCBABCDEF----
---GFEDCBABCDEFG---
--HGFEDCBABCDEFGH--
-IHGFEDCBABCDEFGHI-
JIHGFEDCBABCDEFGHIJ
-----A-----
----BAB----
---CBABC---
--DCBABCD--
-EDCBABCDE-
FEDCBABCDEF


## 1.b
Make a function F that takes only one argument, a dictionary(dict) d.
The keys of d are integers and the values of d are a tuple of type (x (int), y (int)).
You must print out the dict in the format "-key-, -x-, -y-" with each entry in a new line. Print it for each of the three sorted orders, by key values ascending, by x values descending, by y values ascending.

In [3]:
def F(d: dict):
    # Sorting by key ascending
    print("Sorted by key (ascending):")
    for k, v in sorted(d.items()):
        print(f"-{k}-, -{v[0]}-, -{v[1]}-")
    
    # Sorting by x descending
    print("\nSorted by x (descending):")
    for k, v in sorted(d.items(), key=lambda item: -item[1][0]):
        print(f"-{k}-, -{v[0]}-, -{v[1]}-")
    
    # Sorting by y ascending
    print("\nSorted by y (ascending):")
    for k, v in sorted(d.items(), key=lambda item: item[1][1]):
        print(f"-{k}-, -{v[0]}-, -{v[1]}-")

# Tests
F({1: (1, 2), 2: (-1, 4), 5: (-4, 3), 4: (2, 3)})
F({-8: (4, 2), 6: (-3, 4), 7: (2, 1), 5: (9, -10)})


Sorted by key (ascending):
-1-, -1-, -2-
-2-, --1-, -4-
-4-, -2-, -3-
-5-, --4-, -3-

Sorted by x (descending):
-4-, -2-, -3-
-1-, -1-, -2-
-2-, --1-, -4-
-5-, --4-, -3-

Sorted by y (ascending):
-1-, -1-, -2-
-5-, --4-, -3-
-4-, -2-, -3-
-2-, --1-, -4-
Sorted by key (ascending):
--8-, -4-, -2-
-5-, -9-, --10-
-6-, --3-, -4-
-7-, -2-, -1-

Sorted by x (descending):
-5-, -9-, --10-
--8-, -4-, -2-
-7-, -2-, -1-
-6-, --3-, -4-

Sorted by y (ascending):
-5-, -9-, --10-
-7-, -2-, -1-
--8-, -4-, -2-
-6-, --3-, -4-


## 2 Working with Student Records

Use the data in **student_records.csv** to complete the given tasks. Do not include any external libraries. Use a Python dictionary if required.

### Reference
- [Python Dictionaries](https://www.w3schools.com/python/python_dictionaries.asp)


#### 2.a: open the student_records.csv file and print out the first 10 rows

In [None]:
def read_csv(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    return [line.strip().split(',') for line in lines]

data = read_csv('student_records.csv')
print("First 10 rows:")
for row in data[:10]:
    print(row)


#### 2.b: Print out the total credits and calculte CPI of each student. 
CPI is the weighted average of core courses, and electives (weights being the letter grades converted to number AP,AA=10, AB=9, BB=9, BC=7, CC=6)

In [None]:
def calculate_cpi(data):
    grade_map = {'AP': 10, 'AA': 10, 'AB': 9, 'BB': 8, 'BC': 7, 'CC': 6}
    for student in data[1:]:
        courses = student[3:]  # Assuming courses start from column 4
        total_credits, total_points = 0, 0
        for course in courses:
            course_name, credits, grade = course.split('|')
            credits = int(credits)
            total_credits += credits
            total_points += grade_map[grade] * credits
        cpi = total_points / total_credits
        print(f"Student {student[0]} CPI: {cpi}")

calculate_cpi(data)


#### 2.c: Print out the names of all students who meet the graduation requirements 
Atleast 20 credist of core course, 15 credits of department elective, 10 credits of flexible elective and 5 credits of hasmed electives

In [None]:
def filter_graduates(data):
    graduation_reqs = {
        'core': 20,
        'dept_elective': 15,
        'flexible': 10,
        'hasmed': 5
    }

    for student in data[1:]:
        category_totals = {'core': 0, 'dept_elective': 0, 'flexible': 0, 'hasmed': 0}
        for course in student[3:]:
            _, credits, tag = course.split('|')
            if tag in category_totals:
                category_totals[tag] += int(credits)

        if all(category_totals[k] >= v for k, v in graduation_reqs.items()):
            print(f"Student {student[0]} meets graduation requirements.")

# Call the function after loading `data`
filter_graduates(data)


#### 2.d: Print out the names of all students who completed a minor 
Atleast 10 credits with minor tag in a specific department

In [None]:
def filter_minors(data):
    minor_credit_requirement = 10

    for student in data[1:]:
        department_credits = {}
        for course in student[3:]:
            _, credits, tag = course.split('|')
            if 'minor' in tag:
                dept = tag.split('_')[1]  # Extract department
                department_credits[dept] = department_credits.get(dept, 0) + int(credits)

        for dept, total_credits in department_credits.items():
            if total_credits >= minor_credit_requirement:
                print(f"Student {student[0]} completed a minor in {dept}.")

# Call the function after loading `data`
filter_minors(data)

#### 2.e: Print out the names of all students who completed a honours
Atleast 10 credits with honours tag and 20 core credits

In [None]:
def filter_honours(data):
    honours_credit_requirement = 10
    core_credit_requirement = 20

    for student in data[1:]:
        honours_credits = 0
        core_credits = 0
        for course in student[3:]:
            _, credits, tag = course.split('|')
            credits = int(credits)
            if 'honours' in tag:
                honours_credits += credits
            if 'core' in tag:
                core_credits += credits

        if honours_credits >= honours_credit_requirement and core_credits >= core_credit_requirement:
            print(f"Student {student[0]} completed honours requirements.")

# Call the function after loading `data`
filter_honours(data)


# **SciPy** 

## Part A
Minimize the function $f(x, y) = 2(x - y - 3)^2 + 4(x + 2y + 1)^4$.<br>
With the constraints : $ x - y \ge -3, (x + 2)^2 + (y + 1)^2 \le 5$ <br>
Using scipy.optimize.minimize with constraints. (Hint: Look at the examples in the official documentation)

In [None]:
from scipy.optimize import minimize

# Function to minimize
def func(x):
    return 2 * (x[0] - x[1] - 3)**2 + 4 * (x[0] + 2 * x[1] + 1)**4

# Constraints
constraints = [{'type': 'ineq', 'fun': lambda x: x[0] - x[1] + 3},
               {'type': 'ineq', 'fun': lambda x: 5 - ((x[0] + 2)**2 + (x[1] + 1)**2)}]

# Minimize
result = minimize(func, [0, 0], constraints=constraints)
print("Optimized variables:", result.x)


# Part B
Evaluate the line integral of the function $f(x, y) = x^2 + y^4$ along the circle $ x^2 + y^2 = 3 $ anticlockwise (scalar integral, not vector). You must use scipy for integration but you may use parameterization

In [None]:
from scipy.integrate import quad
import numpy as np

# Parameterization of the circle
def x(t): return np.sqrt(3) * np.cos(t)
def y(t): return np.sqrt(3) * np.sin(t)

# Integrand
def integrand(t): return x(t)**2 + y(t)**4

# Integration
result, _ = quad(integrand, 0, 2 * np.pi)
print("Line integral:", result)


# **Numpy**

### Read Lisan_Al_Gaib.pdf for problem description and complete the following functions

In [None]:
import time # to time the execution
import numpy as np
import matplotlib.pyplot as plt

In [None]:
### TODO 1
### Load data from data_path
### Check the input file spice_locations.txt to understand the Data Format
### Return : np array of size Nx2
def load_data(data_path):
    return np.loadtxt(data_path, delimiter=',')

# Example Usage:
# data = load_data('spice_locations.txt')


In [None]:
### TODO 2.1
### If init_centers is None, initialize the centers by selecting K data points at random without replacement
### Else, use the centers provided in init_centers
### Return : np array of size Kx2
def initialise_centers(data, K, init_centers=None):
    if init_centers is None:
        return data[np.random.choice(data.shape[0], K, replace=False)]
    return init_centers

In [None]:
### TODO 2.2
### Initialize the labels to all ones to size (N,) where N is the number of data points
### Return : np array of size N
def initialise_labels(data):
    return None

In [None]:
### TODO 3.1 : E step
### For Each data point, find the distance to each center
### Return : np array of size NxK
def calculate_distances(data, centers):
    return np.linalg.norm(data[:, np.newaxis] - centers, axis=2)

In [None]:
### TODO 3.2 : E step
### For Each data point, assign the label of the nearest center
### Return : np array of size N
def update_labels(distances):
    return np.argmin(distances, axis=1)

In [None]:
### TODO 4 : M step
### Update the centers to the mean of the data points assigned to it
### Return : np array of size Kx2
def update_centers(data, labels, K):
    return np.array([data[labels == k].mean(axis=0) for k in range(K)])



In [None]:
### TODO 6 : Check convergence
### Check if the labels have changed from the previous iteration
### Return : True / False
def check_termination(labels1, labels2):
    return np.array_equal(labels1, labels2)

In [None]:
### DON'T CHANGE ANYTHING IN THE FOLLOWING FUNCTION
def kmeans(data_path:str, K:int, init_centers):
    '''
    Input :
        data (type str): path to the file containing the data
        K (type int): number of clusters
        init_centers (type numpy.ndarray): initial centers. shape = (K, 2) or None
    Output :
        centers (type numpy.ndarray): final centers. shape = (K, 2)
        labels (type numpy.ndarray): label of each data point. shape = (N,)
        time (type float): time taken by the algorithm to converge in seconds
    N is the number of data points each of shape (2,)
    '''
    data = load_data(data_path)    
    centers = initialise_centers(data, K, init_centers)
    labels = initialise_labels(data)

    start_time = time.time() # Time stamp 

    while True:
        distances = calculate_distances(data, centers)
        labels_new = update_labels(distances)
        centers = update_centers(data, labels_new, K)
        if check_termination(labels, labels_new): break
        else: labels = labels_new
 
    end_time = time.time() # Time stamp after the algorithm ends
    return centers, labels, end_time - start_time 

In [None]:
### TODO 7
def visualise(data_path, labels, centers):
    data = load_data(data_path)

    # Scatter plot of the data points
    plt.scatter(data[:, 0], data[:, 1], c=labels, s=50, cmap='viridis')
    plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)

    ### Set title as 'K-means clustering'
    plt.title("K-means clustering")
    ### Set xlabel as 'Longitude'
    plt.xlabel("Longitude")
    ### Set ylabel as 'Latitude'
    plt.ylabel("Latitude")
    ### Save the plot as 'kmeans.png'
    plt.savefig("kmeans.png")
    
    ## DO NOT CHANGE THE FOLLOWING LINE
    return plt

In [None]:
### After you have completed the above functions, run the following code to generate the plot
data_path = 'spice_locations.txt'
K, init_centers = 2, None
centers, labels, time_taken = kmeans(data_path, K, init_centers)
print('Time taken for the algorithm to converge:', time_taken)
visualise(data_path, labels, centers)