# **Basic Python**

## 1.a 
Give a function F that takes a single argument n. This function should print a sequence of n strings as described in the examples below: <br>
Ex: **n = 3**

--A--<br>
-BAB-<br>
CBABC<br>

**n = 4**

---A---<br>
--BAB--<br>
-CBABC-<br>
DCBABCD

In [33]:
def F(n):
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    for i in range(1, n+1):
        print((n-i)*'-' + letters[i-1::-1] + letters[1:i] + (n-i)*'-')
    

# Tests
F(10)
F(6)

## 1.b
Make a function F that takes only one argument, a dictionary(dict) d.
The keys of d are integers and the values of d are a tuple of type (x (int), y (int)).
You must print out the dict in the format "-key-, -x-, -y-" with each entry in a new line. Print it for each of the three sorted orders, by key values ascending, by x values descending, by y values ascending.

In [34]:
def F(d : dict):
    l = [(k, x, y) for k, (x,y) in d.items()]
    # order 1:
    l.sort(key=lambda t: t[0])
    for t in l:
        print(*t, sep=', ')
    print('-'*80)
    # order 2:
    l.sort(key=lambda t: t[1], reverse=True)
    for t in l:
        print(*t, sep=', ')
    print('-'*80)
    # order 3:
    l.sort(key=lambda t: t[2])
    for t in l:
        print(*t, sep=', ')
    print('-'*80)
    print('-'*80)

# Tests
F({1 : (1, 2), 2 : (-1, 4), 5 : (-4, 3), 4 : (2, 3)})
F({-8 : (4, 2), 6 : (-3, 4), 7 : (2, 1), 5 : (9, -10)})

## 2 Working with Student Records

Use the data in **student_records.csv** to complete the given tasks. Do not include any external libraries. Use a Python dictionary if required.

### Reference
- [Python Dictionaries](https://www.w3schools.com/python/python_dictionaries.asp)


#### 2.a: open the student_records.csv file and print out the first 10 rows

In [35]:
import csv

with open("student_records.csv", newline='') as f:
    reader = csv.reader(f)
    next(reader) # headings line
    for _, row in zip(range(10), reader):
        print(*row, sep=', ')

#### 2.b: Print out the total credits and calculte CPI of each student. 
CPI is the weighted average of core courses, and electives (weights being the letter grades converted to number AP,AA=10, AB=9, BB=9, BC=7, CC=6)

In [36]:
import csv

scores = {
    'AP': 10,
    'AA': 10,
    'AB': 9,
    'BB': 8,
    'BC': 7,
    'CC': 6,
}

students = {}

with open("student_records.csv", newline='') as f:
    reader = csv.reader(f)
    next(reader) # headings line
    for roll_number, _, credit, _, _, grade in reader:
        roll_number = int(roll_number)
        credit = int(credit)
        grade = scores[grade]
        students.setdefault(roll_number, [0, 0]) # CPI determined by [0]/[1]; total credits is [1] | obviously there should not be a 0-credit course in this context so there wont be a 0/0 situation incoming later
        students[roll_number][0] += credit * grade
        students[roll_number][1] += credit

for roll_number, (total_cg, total_c) in students.items(): # unspecified order
    print(f"{roll_number}: CPI={total_cg/total_c:.2f}, Total Credits={total_c}")

#### 2.c: Print out the names of all students who meet the graduation requirements 
Atleast 20 credits of core course, 15 credits of department elective, 10 credits of flexible elective and 5 credits of hasmed electives

In [37]:
import csv

students = {}
indices_of_type = { 'core': 0, 'department_elective': 1, 'flexible_elective': 2, 'hasmed_elective': 3 }
minimum_required = (20, 15, 10, 5)

with open("student_records.csv", newline='') as f:
    reader = csv.reader(f)
    next(reader) # headings line
    for roll_number, _, credit, _, course_type, _ in reader:
        if course_type not in indices_of_type: 
            continue
        roll_number = int(roll_number)
        credit = int(credit)
        students.setdefault(roll_number, [0] * len(indices_of_type))
        students[roll_number][indices_of_type[course_type]] += credit

for roll_number, scores in students.items():
    if all(x >= m for x, m in zip(scores, minimum_required)):
        print(roll_number) # no name given so well...

#### 2.d: Print out the names of all students who completed a minor 
Atleast 10 credits with minor tag in a specific department

In [38]:
import csv

students = {}

with open("student_records.csv", newline='') as f:
    reader = csv.reader(f)
    next(reader) # headings line
    for roll_number, _, credit, course_name, course_type, _ in reader:
        if course_type != 'minor':
            continue
        roll_number = int(roll_number)
        credit = int(credit)
        course_department = course_name.rstrip('1234567890')
        students.setdefault(roll_number, {})
        students[roll_number].setdefault(course_department, 0)
        students[roll_number][course_department] += credit

for roll_number, credits in students.items():
    if any(x >= 10 for x in credits.values()):
        print(f'{roll_number} got minor in:', ', '.join([dept for dept, x in credits.items() if x >= 10])) # no name given so well... (also printed which depts the student got the minor in, remove second part of print statement if necessary)

#### 2.e: Print out the names of all students who completed a honours
Atleast 10 credits with honours tag and 20 core credits

In [39]:
import csv

students = {}
indices_of_type = { 'core': 0, 'honours': 1 }
minimum_required = (20, 10)

with open("student_records.csv", newline='') as f:
    reader = csv.reader(f)
    next(reader) # headings line
    for roll_number, _, credit, _, course_type, _ in reader:
        if course_type not in indices_of_type: 
            continue
        roll_number = int(roll_number)
        credit = int(credit)
        students.setdefault(roll_number, [0] * len(indices_of_type))
        students[roll_number][indices_of_type[course_type]] += credit

for roll_number, scores in students.items():
    if all(x >= m for x, m in zip(scores, minimum_required)):
        print(roll_number) # no name given so well...

# **SciPy** 

## Part A
Minimize the function $f(x, y) = 2(x - y - 3)^2 + 4(x + 2y + 1)^4$.<br>
With the constraints : $ x - y \ge -3, (x + 2)^2 + (y + 1)^2 \le 5$ <br>
Using scipy.optimize.minimize with constraints. (Hint: Look at the examples in the official documentation)

In [68]:
from scipy.optimize import minimize
import numpy as np

def f(x0):
    x, y = x0
    return 2 * (x-y-3)**2 + 4 * (x+2*y+1)**4
    
print(minimize(f, np.array([0, 0]), constraints=[
    {'type':'ineq', 'fun': lambda x: x[0]-x[1]+3},    
    {'type':'ineq', 'fun': lambda x: 5 - (x[0]+2)**2 - (x[1]+1)**2}
]))

## Part B
Evaluate the line integral of the function $f(x, y) = x^2 + y^4$ along the circle $ x^2 + y^2 = 3 $ anticlockwise (scalar integral, not vector). You must use scipy for integration but you may use parameterization

In [69]:
from scipy.integrate import quad
from math import sin, cos, pi

print(quad(lambda t: (3 * cos(t)**2 + 9 * sin(t)**4), 0, 2*pi))

# **Numpy**

### Read Lisan_Al_Gaib.pdf for problem description and complete the following functions

In [42]:
import time # to time the execution
import numpy as np
import matplotlib.pyplot as plt

In [43]:
### TODO 1
### Load data from data_path
### Check the input file spice_locations.txt to understand the Data Format
### Return : np array of size Nx2
import csv
def load_data(data_path):
    data = []
    with open(data_path, newline='') as f:
        reader = csv.reader(f)
        for x, y in reader:
            data.append([float(x), float(y)])
    return np.array(data)


In [44]:
### TODO 2.1
### If init_centers is None, initialize the centers by selecting K data points at random without replacement
### Else, use the centers provided in init_centers
### Return : np array of size Kx2
def initialise_centers(data, K, init_centers=None):
    return data[np.random.choice(data.shape[0], K, replace=False),:] \
        if init_centers is None else init_centers

In [45]:
### TODO 2.2
### Initialize the labels to all ones to size (N,) where N is the number of data points
### Return : np array of size N
def initialise_labels(data: np.ndarray[float]):
    return np.ones(data.shape[0])


In [56]:
### TODO 3.1 : E step
### For Each data point, find the distance to each center
### Return : np array of size NxK
def calculate_distances(data, centers):
    answer = np.empty(shape=(data.shape[0], centers.shape[0]))
    for i, point in enumerate(data):
        for j, center in enumerate(centers):
            answer[i][j] = np.linalg.norm(point-center)
    return answer


In [47]:
### TODO 3.2 : E step
### For Each data point, assign the label of the nearest center
### Return : np array of size N
def update_labels(distances):
    return np.array([np.argmin(d) for d in distances])

In [61]:
### TODO 4 : M step
### Update the centers to the mean of the data points assigned to it
### Return : np array of size Kx2
def update_centers(data, labels, K):
    centers = np.zeros([K, 2])
    counts = np.zeros(K, dtype=np.uint32) # idts anyone is gonna use more than 2^32-1 values
    for point, label in zip(data, labels):
        centers[label] += point
        counts[label] += 1
    return centers/counts # pray to god we dont get a 0/0 situation


In [58]:
### TODO 6 : Check convergence
### Check if the labels have changed from the previous iteration
### Return : True / False
def check_termination(labels1, labels2):
    return (labels1 == labels2).all()

In [50]:
### DON'T CHANGE ANYTHING IN THE FOLLOWING FUNCTION
def kmeans(data_path:str, K:int, init_centers):
    '''
    Input :
        data (type str): path to the file containing the data
        K (type int): number of clusters
        init_centers (type numpy.ndarray): initial centers. shape = (K, 2) or None
    Output :
        centers (type numpy.ndarray): final centers. shape = (K, 2)
        labels (type numpy.ndarray): label of each data point. shape = (N,)
        time (type float): time taken by the algorithm to converge in seconds
    N is the number of data points each of shape (2,)
    '''
    data = load_data(data_path)    
    centers = initialise_centers(data, K, init_centers)
    labels = initialise_labels(data)

    start_time = time.time() # Time stamp 

    while True:
        distances = calculate_distances(data, centers)
        labels_new = update_labels(distances)
        centers = update_centers(data, labels_new, K)
        if check_termination(labels, labels_new): break
        else: labels = labels_new
 
    end_time = time.time() # Time stamp after the algorithm ends
    return centers, labels, end_time - start_time 

In [66]:
### TODO 7
def visualise(data_path, labels, centers):
    data = load_data(data_path)

    # Scatter plot of the data points
    plt.scatter(data[:, 0], data[:, 1], c=labels, s=50, cmap='viridis')
    plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)

    print(labels, centers)

    ### Set title as 'K-means clustering'
    plt.title("K-means clustering")
    ### Set xlabel as 'Longitude'
    plt.xlabel('Longitude')
    ### Set ylabel as 'Latitude'
    plt.xlabel('Latitude')
    ### Save the plot as 'kmeans.png'
    plt.savefig('kmeans.png')

    ## DO NOT CHANGE THE FOLLOWING LINE
    return plt

In [67]:
### After you have completed the above functions, run the following code to generate the plot
data_path = 'spice_locations.txt'
K, init_centers = 2, None
centers, labels, time_taken = kmeans(data_path, K, init_centers)
print('Time taken for the algorithm to converge:', time_taken)
visualise(data_path, labels, centers)