# **Basic Python**

## 1.a 
Give a function F that takes a single argument n. This function should print a sequence of n strings as described in the examples below: <br>
Ex: **n = 3**

--A--<br>
-BAB-<br>
CBABC<br>

**n = 4**

---A---<br>
--BAB--<br>
-CBABC-<br>
DCBABCD

In [None]:
def F(n):
    """
    Print a sequence of n strings based on the described pattern.
    Args:
        n (int): Number of rows in the pattern.
    """
    # A list to store all rows of the pattern
    pattern = []
    total_width = 2 * n - 1  # Total width for padding

    # Generate each row
    for i in range(n):
        # Left side of the row (descending letters)
        left_part = "".join(chr(65 + j) for j in range(i, -1, -1))
        
        # Right side of the row (ascending letters, excluding middle character)
        right_part = "".join(chr(65 + j) for j in range(1, i + 1))
        
        # Combine left and right parts
        row = left_part + right_part
        
        # Center the row with padding using "-"
        centered_row = row.center(total_width, "-")
        
        # Append to the pattern list
        pattern.append(centered_row)

    # Print the pattern row by row
    for line in pattern:
        print(line)
#for any number for rows the folloing code is valid.
#n=int(input("number of rows required?"))
#F(n)

# Tests
F(10)
F(6)

## 1.b
Make a function F that takes only one argument, a dictionary(dict) d.
The keys of d are integers and the values of d are a tuple of type (x (int), y (int)).
You must print out the dict in the format "-key-, -x-, -y-" with each entry in a new line. Print it for each of the three sorted orders, by key values ascending, by x values descending, by y values ascending.

In [None]:
def F(d):
    """
    Print the dictionary d in three sorted orders:
    1. By keys in ascending order.
    2. By x values in descending order.
    3. By y values in ascending order.

    
    """
    # 1. Sort by keys (ascending)
    sorted_by_keys = sorted(d.items(), key=lambda item: item[0])
    print("Sorted by keys (ascending):")
    for key, value in sorted_by_keys:
        print(f"-{key}-, -{value[0]}-, -{value[1]}-")
    
    # 2. Sort by x values (descending)
    sorted_by_x = sorted(d.items(), key=lambda item: item[1][0], reverse=True)
    print("\nSorted by x values (descending):")
    for key, value in sorted_by_x:
        print(f"-{key}-, -{value[0]}-, -{value[1]}-")
    
    # 3. Sort by y values (ascending)
    sorted_by_y = sorted(d.items(), key=lambda item: item[1][1])
    print("\nSorted by y values (ascending):")
    for key, value in sorted_by_y:
        print(f"-{key}-, -{value[0]}-, -{value[1]}-")


# Tests
F({1 : (1, 2), 2 : (-1, 4), 5 : (-4, 3), 4 : (2, 3)})
F({-8 : (4, 2), 6 : (-3, 4), 7 : (2, 1), 5 : (9, -10)})

## 2 Working with Student Records

Use the data in **student_records.csv** to complete the given tasks. Do not include any external libraries. Use a Python dictionary if required.

### Reference
- [Python Dictionaries](https://www.w3schools.com/python/python_dictionaries.asp)


#### 2.a: open the student_records.csv file and print out the first 10 rows

In [None]:
import csv

# Path to the CSV file
file_path = "C:\\Users\\vipul\\Downloads\\student_records.csv"


# Open and read the CSV file
with open(file_path, "r") as file:
    reader = csv.reader(file)
    header = next(reader)  # Read the header row
    print("Header:", ", ".join(header))  # Print the header

    print("\nFirst 10 rows:")
    for i, row in enumerate(reader):
        if i < 10:  # Limit to the first 10 rows
            print(", ".join(row))
        else:
            break


#### 2.b: Print out the total credits and calculte CPI of each student. 
CPI is the weighted average of core courses, and electives (weights being the letter grades converted to number AP,AA=10, AB=9, BB=8, BC=7, CC=6)

In [None]:
import csv

# Grade-to-Weight mapping
grade_mapping = {"AP": 10, "AA": 10, "AB": 9, "BB": 8, "BC": 7, "CC": 6}

# Path to the CSV file
file_path = "C:\\Users\\vipul\\Downloads\\student_records.csv"


# Dictionary to store total credits and CPI for each student
cpi_results = {}

# Read the CSV file
with open(file_path, "r") as file:
    reader = csv.DictReader(file)  # Use DictReader to access columns by name

    # Process each row
    for row in reader:
        roll_number = row["roll_number"]
        credit = int(row["credit"])
        grade = row["grade"]

        # Initialize student data if not already present
        if roll_number not in cpi_results:
            cpi_results[roll_number] = {"total_credits": 0, "weighted_sum": 0}

        # Update total credits and weighted sum for the student
        cpi_results[roll_number]["total_credits"] += credit
        cpi_results[roll_number]["weighted_sum"] += grade_mapping[grade] * credit

# Calculate and print the results
print("Roll Number | Total Credits | CPI")
for roll_number, data in cpi_results.items():
    total_credits = data["total_credits"]
    weighted_sum = data["weighted_sum"]
    cpi = weighted_sum / total_credits if total_credits > 0 else 0
    print(f"{roll_number} | {total_credits} | {cpi:.2f}")


#### 2.c: Print out the roll numbers of all students who meet the graduation requirements 
Atleast 20 credist of core course, 15 credits of department elective, 10 credits of flexible elective and 5 credits of hasmed electives

In [None]:
import csv


file_path = "C:\\Users\\vipul\\Downloads\\student_records.csv"

# Requirements for graduation
required_credits = {
    "core": 20,
    "department_elective": 15,
    "flexible_elective": 10,
    "hasmed_elective": 5,
}

# Dictionary to store credit summaries for each student
student_credits = {}

# Read the CSV file and aggregate credits for each roll number by course type
with open(file_path, "r") as file:
    reader = csv.DictReader(file)  # Use DictReader to access columns by name
    for row in reader:
        roll_number = row["roll_number"]
        course_type = row["course_type"]
        credit = int(row["credit"])

        # Initialize the student's record if not already present
        if roll_number not in student_credits:
            student_credits[roll_number] = {
                "core": 0,
                "department_elective": 0,
                "flexible_elective": 0,
                "hasmed_elective": 0,
            }

        # Add credits to the respective course type
        if course_type in student_credits[roll_number]:
            student_credits[roll_number][course_type] += credit

# Identify roll numbers meeting graduation requirements
eligible_students = []
for roll_number, credits in student_credits.items():
    if all(
        credits.get(course_type, 0) >= required_credits[course_type]
        for course_type in required_credits
    ):
        eligible_students.append(roll_number)


print("Roll numbers of students meeting graduation requirements:")
print(eligible_students)


#### 2.d: Print out the roll numbers of all students who completed a minor 
Atleast 10 credits with minor tag in a specific department

In [None]:
import csv

file_path = "C:\\Users\\vipul\\Downloads\\student_records.csv"

# Dictionary to store credits for minor courses per department for each student
student_minors = {}

# Read the CSV file and aggregate credits for minor courses
with open(file_path, "r") as file:
    reader = csv.DictReader(file)  # Use DictReader to access columns by name
    for row in reader:
        roll_number = row["roll_number"]
        department = row["department"]
        course_type = row["course_type"]
        credit = int(row["credit"])

        # Process only minor courses
        if course_type == "minor":
            if roll_number not in student_minors:
                student_minors[roll_number] = {}

            if department not in student_minors[roll_number]:
                student_minors[roll_number][department] = 0

            # Add credits to the respective department for the student
            student_minors[roll_number][department] += credit

# Identify roll numbers meeting the minor completion requirement
eligible_students = []
for roll_number, departments in student_minors.items():
    for department, credits in departments.items():
        if credits >= 10:
            eligible_students.append(roll_number)
            break  # A student needs to satisfy the requirement in only one department

print("Roll numbers of students who completed a minor:")
print(eligible_students)


#### 2.e: Print out the roll numbers of all students who completed a honours
Atleast 10 credits with honours tag and 20 core credits

In [None]:
import csv


file_path = "C:\\Users\\vipul\\Downloads\\student_records.csv"

# Dictionary to store credits for each course type per student
student_credits = {}

# Read the CSV file and aggregate credits for honours and core courses
with open(file_path, "r") as file:
    reader = csv.DictReader(file)  # Use DictReader to access columns by name
    for row in reader:
        roll_number = row["roll_number"]
        course_type = row["course_type"]
        credit = int(row["credit"])

        # Initialize the student's record if not already present
        if roll_number not in student_credits:
            student_credits[roll_number] = {"honours": 0, "core": 0}

        # Add credits to the respective course type for the student
        if course_type in student_credits[roll_number]:
            student_credits[roll_number][course_type] += credit

# Identify roll numbers meeting honours completion requirements
eligible_students = []
for roll_number, credits in student_credits.items():
    if credits["honours"] >= 10 and credits["core"] >= 20:
        eligible_students.append(roll_number)


print("Roll numbers of students who completed an honours:")
print(eligible_students)


# **SciPy** 

## Part A
Minimize the function $f(x, y) = 2(x - y - 3)^2 + 4(x + 2y + 1)^4$.<br>
With the constraints : $ x - y \ge -3, (x + 2)^2 + (y + 1)^2 \le 5$ <br>
Using scipy.optimize.minimize with constraints. (Hint: Look at the examples in the official documentation)

In [None]:
from scipy.optimize import minimize

# Define the objective function
def objective_function(x):
    """
    Objective function to minimize: f(x, y) = 2(x - y - 3)^2 + 4(x + 2y + 1)^4
    Args:
        x: A list or array where x[0] = x and x[1] = y.
    Returns:
        The value of the objective function.
    """
    return 2 * (x[0] - x[1] - 3)**2 + 4 * (x[0] + 2 * x[1] + 1)**4

# Define the constraints
constraints = [
    {'type': 'ineq', 'fun': lambda x: x[0] - x[1] + 3},  # x - y >= -3
    {'type': 'ineq', 'fun': lambda x: 5 - ((x[0] + 2)**2 + (x[1] + 1)**2)}  # (x + 2)^2 + (y + 1)^2 <= 5
]

# Initial guess
x0 = [0, 0]  # Initial guess for x and y

# Perform minimization
result = minimize(objective_function, x0, method='SLSQP', constraints=constraints)

# Print the results
if result.success:
    print("Optimization was successful!")
    print(f"Optimal value of x and y: {result.x}")
    print(f"Minimum value of the objective function: {result.fun}")
else:
    print("Optimization failed!")
    print(f"Reason: {result.message}")


# Part B
Evaluate the line integral of the function $f(x, y) = x^2 + y^4$ along the circle $ x^2 + y^2 = 3 $ anticlockwise (scalar integral, not vector). You must use scipy for integration but you may use parameterization

In [None]:
import numpy as np
from scipy.integrate import quad

# Define the function f(x, y) along the parameterized circle
def integrand(t):
    sqrt3 = np.sqrt(3)
    x = sqrt3 * np.cos(t)
    y = sqrt3 * np.sin(t)
    f_xy = x**2 + y**4
    ds_dt = sqrt3  # Differential arc length
    return f_xy * ds_dt

# Perform the integration over t from 0 to 2π
result, error = quad(integrand, 0, 2 * np.pi)


print(f"The value of the line integral is: {result:.4f}")


# **Numpy**

### Read Lisan_Al_Gaib.pdf for problem description and complete the following functions

In [None]:
import time # to time the execution
import numpy as np
import matplotlib.pyplot as plt

In [None]:
### TODO 1
def load_data(data_path):
    """
    Load data from the provided file path.
    Args:
        data_path (str): Path to the file containing the data points.
    Returns:
        numpy.ndarray: Loaded data points as a 2D NumPy array.
    """
    try:
        data = np.loadtxt(data_path, delimiter=",")
        return data
    except Exception as e:
        print(f"Error loading data from {data_path}: {e}")
        return None


In [None]:
### TODO 2.1
def initialise_centers(data, K, init_centers=None):
    """
    Initialize the cluster centers.
    Args:
        data (numpy.ndarray): Dataset with shape (N, 2).
        K (int): Number of clusters.
        init_centers (numpy.ndarray or None): Initial centers, if provided.
    Returns:
        numpy.ndarray: Initial cluster centers with shape (K, 2).
    """
    if init_centers is None:
        return data[np.random.choice(data.shape[0], size=K, replace=False)]
    return init_centers


In [None]:
### TODO 2.2
def initialise_labels(data):
    """
    Initialize the labels for each data point.
    Args:
        data (numpy.ndarray): Dataset with shape (N, 2).
    Returns:
        numpy.ndarray: Array of initial labels with shape (N,).
    """
    return np.ones(data.shape[0], dtype=int)


In [None]:
### TODO 3.1 : E step
### For Each data point, find the distance to each center
### Return : np array of size NxK
def calculate_distances(data, centers):
    return None### TODO 3.1
def calculate_distances(data, centers):
    """
    Calculate the distance of each data point from each cluster center.
    Args:
        data (numpy.ndarray): Dataset with shape (N, 2).
        centers (numpy.ndarray): Cluster centers with shape (K, 2).
    Returns:
        numpy.ndarray: Distances of each point to each center with shape (N, K).
    """
    return np.linalg.norm(data[:, None] - centers[None, :], axis=2)


In [None]:
### TODO 3.2
def update_labels(distances):
    """
    Assign each data point to the nearest cluster center.
    Args:
        distances (numpy.ndarray): Distances of each point to each center with shape (N, K).
    Returns:
        numpy.ndarray: Updated labels for each data point with shape (N,).
    """
    return np.argmin(distances, axis=1)


In [None]:
### TODO 4 : M step
### Update the centers to the mean of the data points assigned to it
### Return : np array of size Kx2
def update_centers(data, labels, K):
    """
    Update the cluster centers to the mean of assigned data points.
    Args:
        data (numpy.ndarray): Dataset with shape (N, 2).
        labels (numpy.ndarray): Cluster labels with shape (N,).
        K (int): Number of clusters.
    Returns:
        numpy.ndarray: Updated cluster centers with shape (K, 2).
    """
    return np.array([data[labels == k].mean(axis=0) for k in range(K)])



In [None]:
### TODO 6 : Check convergence
### Check if the labels have changed from the previous iteration
### Return : True / False
def check_termination(labels1, labels2):
    """
    Check if the labels have stabilized between iterations.
    Args:
        labels1 (numpy.ndarray): Labels from the previous iteration.
        labels2 (numpy.ndarray): Labels from the current iteration.
    Returns:
        bool: True if labels have not changed, otherwise False.
    """
    return np.array_equal(labels1, labels2)


In [None]:
### DON'T CHANGE ANYTHING IN THE FOLLOWING FUNCTION
def kmeans(data_path:str, K:int, init_centers):
    '''
    Input :
        data (type str): path to the file containing the data
        K (type int): number of clusters
        init_centers (type numpy.ndarray): initial centers. shape = (K, 2) or None
    Output :
        centers (type numpy.ndarray): final centers. shape = (K, 2)
        labels (type numpy.ndarray): label of each data point. shape = (N,)
        time (type float): time taken by the algorithm to converge in seconds
    N is the number of data points each of shape (2,)
    '''
    data = load_data(data_path)    
    centers = initialise_centers(data, K, init_centers)
    labels = initialise_labels(data)

    start_time = time.time() # Time stamp 

    while True:
        distances = calculate_distances(data, centers)
        labels_new = update_labels(distances)
        centers = update_centers(data, labels_new, K)
        if check_termination(labels, labels_new): break
        else: labels = labels_new
 
    end_time = time.time() # Time stamp after the algorithm ends
    return centers, labels, end_time - start_time 

In [None]:
### TODO 7
def visualise(data_path, labels, centers):
    """
    Visualize the clustered data and save the plot as 'kmeans.png'.
    Args:
        data_path (str): Path to the data file.
        labels (numpy.ndarray): Cluster labels.
        centers (numpy.ndarray): Cluster centers.
    """
    data = load_data(data_path)

    plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='viridis', s=50)
    plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.7)
    plt.title('K-means clustering')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.savefig('kmeans.png')
    plt.show()


In [None]:
### After you have completed the above functions, run the following code to generate the plot
data_path = 'C:\\Users\\vipul\\Downloads\\spice_locations.txt'
K, init_centers = 2, None
centers, labels, time_taken = kmeans(data_path, K, init_centers)
print('Time taken for the algorithm to converge:', time_taken)
visualise(data_path, labels, centers)
