In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd
import time

#Question 1
How many multiplications and additions do you need to perform a matrix multiplication between a (n, k) and (k, m) matrix? Explain.

#Solution
 **Number of Multiplications:** We need to perform k multiplications per element in order to compute each element of the resultant n x m matrix. As there are n×m elements in the resulting matrix, the total number of multiplications required is [n × m × k].

**Number of additions:** After performing the multiplications, we need to add (k - 1) products to obtain each element. Hence, (k − 1) additions are required. Thus, the total number of additions would be [n × m × (k−1)].


#Question 2
Write Python code to multiply the above two matrices. Solve using list of lists and then use numpy. Compare the timing of both solutions. Which one is faster? Why?

In [51]:
# Multiplying the matrices using nested lists
startTime = time.time()
def nestedList_matrix_multiplication(lst1, lst2):
    n = len(lst1)
    m = len(lst2[0])
    k1 = len(lst1[0])

    #Initialising matrix using list comprehension
    res = [[0 for i in range(m)] for i in range(n)]

    for i in range(n):
        for j in range(m):
            for k in range(k1):
                res[i][j] = res[i][j] + (lst1[i][k]*lst2[k][j])
    return res


lst1 = [[2,3,4],[5,6,7]]
lst2 = [[2,3],[4,5],[6,7]]

print(nestedList_matrix_multiplication(lst1, lst2))
endTime = time.time()
print('Time taken:', endTime - startTime)

[[40, 49], [76, 94]]
Time taken: 0.0022809505462646484


In [53]:
# Multiplying the matrices using Numpy module
startTime = time.time()
def numpy_matrix_multiplication(arr1, arr2):
    n = len(arr1)
    m = len(arr2[0])
    #Initialising the matrix
    res = np.zeros((n,m), dtype = int)
    arr2 = arr2.T
    for i in range(n):
        for j in range(m):
            res[i][j] = np.sum(arr1[i]*arr2[j])
    return res


arr1 = np.array([[2,3,4],[5,6,7]])
arr2 = np.array([[2,3],[4,5],[6,7]])

print(numpy_matrix_multiplication(arr1,arr2))
endTime = time.time()
print('Time taken:',endTime - startTime)

[[40 49]
 [76 94]]
Time taken: 0.004260540008544922


After running this code for large matrices, we can clearly see that numpy matrix multiplication is reducing the computational time significantly!

#Question 3
Finding the highest element in a list requires one pass of the array. Finding the second highest element requires 2 passes of the the array. Using this method, what is the time complexity of finding the median of the array? Can you suggest a better method? Can you implement both these methods in Python and compare against numpy.median routine in terms of time?

In [11]:
#Finding the highest element in the array
def highest_element(arr):
    maxx = arr[0]
    n = len(arr)
    for i in range(0,n):
        if arr[i] > maxx:
            maxx = arr[i]

    return maxx

arr = np.array([1,2,43,56,576,68,8,6,5334,4,3212,4,455,6,78,-435,5,-67,5])
highest_element(arr)

5334

In [15]:
#Finding the second highest element with 2 passes
def second_highest(arr):
    maxx = float('-inf')  # Initialize maximum value
    second_maxx = float('-inf')  # Initialize second maximum value

    for num in arr:
        if num > maxx:
            maxx = num

    for num in arr:
        if num > second_maxx and num != maxx:
            second_maxx = num


    return second_maxx

arr = np.array([1,2,43,56,576,68,8,6,53,4,3212,4,455,6,78,-435,5,-67,5])
second_highest(arr)

576

In [59]:
start = time.time()

def find_median(arr):
    n = len(arr)
    if n%2==1:
        for j in range((n//2)+1):
            max_val = -1000000
            for i in arr:
                if i>max_val:
                    max_val = i

            arr = np.delete(arr, np.argwhere(arr==max_val))#Deletion by value
    else:
        for j in range((n//2)+1):
            max_val = -1000000
            for i in arr:
                if i>max_val:
                    max_val = i
            if j==(n/2)-1:
                max_val1 = max_val
            arr = np.delete(arr, np.argwhere(arr==max_val))#Deletion by value

    return (max_val1 + max_val)/2



# arr = np.array([2,43,56,576,68,8,6,5334,4,3212,4,455,6,78,-435,5,-67,5])
# arr = np.arange(1,7)
arr = np.random.random(10000)
userdefined_median = find_median(arr)

end = time.time()

print('My function result',userdefined_median, end = " ")
print('Time taken:', end-start,'seconds')


#Using Builtin np.median function
start = time.time()
builtin_median = np.median(arr)
end = time.time()

print('Builtin Function result',builtin_median, end = ' ')
print('Time taken:', end-start,'seconds')

My function result 0.5056535828822725 Time taken: 3.7345118522644043 seconds
Builtin Function result 0.5056535828822725 Time taken: 0.0005705356597900391 seconds


So, the time complexity of finding the median of an array, if we use two passes for finding the second highest element and one pass for highest one, is quadratic.



#Solution 4:
$$ {f(x,y) = x^2y+y^3\sin(x)} $$

Gradient of the above function f(x,y) is given by -

$$ {(2xy+y^3\cos(x))\vec{a_x}}+{(x^2+3y^2\sin (x))\vec{a_y}} $$

#Question 5
Use JAX to confirm the gradient evaluated by your method matches the analytical solution corresponding to a few random values of x and y.

In [57]:
import jax
import jax.numpy as jnp

# Define the function
def my_function(x, y):
    return x**2 * y + y**3 * jnp.sin(x)

def user_gradient(x, y):
    return np.array([ 2*x*y + y**3 * jnp.cos(x), x**2 + 3*y**2 * jnp.sin(x) ])

# Use JAX's grad function to compute the gradient
grad_my_function = jax.grad(my_function, argnums=(0, 1))

# Choose values for x and y (convert to float)
x_value = 2.0
y_value = 3.0

# gradient using analytical method
user_gradient = user_gradient(x_value, y_value)

# Evaluate the gradient at the point (x_value, y_value)
gradient_result = grad_my_function(x_value, y_value)
# Convert the JAX output to a NumPy array
gradient_result_np = np.array(gradient_result)

# Print the results
print("Gradients at random points using Analytical method:", user_gradient)
print("Gradients at random points using JAX:", gradient_result_np)

Gradients at random points using Analytical method: [ 0.7640352 28.55103  ]
Gradients at random points using JAX: [ 0.7640352 28.55103  ]


#Question 6
Use sympy to confirm that you obtain the same gradient analytically.

In [18]:
from sympy import symbols, diff, sin
x, y = symbols('x y')

# Define the function
f = x**2 * y + y**3 * sin(x)

# Computing the partial derivatives
df_dx = diff(f, x)
df_dy = diff(f, y)

gradient = (df_dx, df_dy)
print("Analytical Gradient using SymPy:", gradient)

Analytical Gradient using SymPy: (2*x*y + y**3*cos(x), x**2 + 3*y**2*sin(x))


#Question 9
Using matplotlib plot the following functions on the domain: x = 0.5 to 100.0 in steps of 0.5 -

In [None]:
#1.
x = np.linspace(0.5, 100.0, 200)
y = x
plt.plot(x,y, '-g')

In [None]:
#2
x = np.linspace(0.5, 100.0, 200)
y = x**2
plt.plot(x,y, '-g')

In [None]:
#3
x = np.linspace(0.5, 100.0, 200)
y = (x**3)/100
plt.plot(x,y, '-g')

In [None]:
#4
x = np.linspace(0.5, 100.0, 200)
y = np.sin(x)
plt.plot(x,y, '-g')

In [None]:
#5
x = np.linspace(0.5, 100.0, 200)
y = np.sin(x)/x
plt.plot(x,y, '-g')

In [None]:
#6
x = np.linspace(0.5, 100.0, 200)
y = np.log(x)
plt.plot(x,y, '-g')

In [None]:
#7
x = np.linspace(0.5, 100.0, 200)
y = np.exp(x)
plt.plot(x,y, '-g')

#Solution 7

In [30]:
dict = {2022:{'Branch 1':{'Roll Number':1,'Name':'N','Marks':{'Maths':100,'English':70,'Hindi':85}
                        },
          'Branch 2':{}},
        2023:{'Branch 1':{},'Branch 2':{}},
        2024:{'Branch 1':{},'Branch 2':{}},
        2025:{'Branch 1':{},'Branch 2':{}}
       }
print(dict)
#dict

{2022: {'Branch 1': {'Roll Number': 1, 'Name': 'N', 'Marks': {'Maths': 100, 'English': 70, 'Hindi': 85}}, 'Branch 2': {}}, 2023: {'Branch 1': {}, 'Branch 2': {}}, 2024: {'Branch 1': {}, 'Branch 2': {}}, 2025: {'Branch 1': {}, 'Branch 2': {}}}


{2022: {'Branch 1': {'Roll Number': 1,
   'Name': 'N',
   'Marks': {'Maths': 100, 'English': 70, 'Hindi': 85}},
  'Branch 2': {}},
 2023: {'Branch 1': {}, 'Branch 2': {}},
 2024: {'Branch 1': {}, 'Branch 2': {}},
 2025: {'Branch 1': {}, 'Branch 2': {}}}

#Question 8
Store the same information using Python classes. We have an overall database which is a list of year objects. Each year contains a list of branches. Each branch contains a list of students. Each student has some properties like name, roll number and has marks in some subjects.

#Question 10
Using numpy generate a matrix of size 20X5 containing random numbers drawn uniformly from the range of 1 to 2. Using Pandas create a dataframe out of this matrix. Name the columns of the dataframe as “a”, “b”, “c”, “d”, “e”. Find the column with the highest standard deviation. Find the row with the lowest mean.

In [33]:
import numpy as np
import pandas as pd

np.random.seed(42)
data = np.random.uniform(1, 2, size=(20, 5))

# Creating a Pandas DataFrame with named columns
df = pd.DataFrame(data, columns=['a', 'b', 'c', 'd', 'e'])
df

Unnamed: 0,a,b,c,d,e
0,1.37454,1.950714,1.731994,1.598658,1.156019
1,1.155995,1.058084,1.866176,1.601115,1.708073
2,1.020584,1.96991,1.832443,1.212339,1.181825
3,1.183405,1.304242,1.524756,1.431945,1.291229
4,1.611853,1.139494,1.292145,1.366362,1.45607
5,1.785176,1.199674,1.514234,1.592415,1.04645
6,1.607545,1.170524,1.065052,1.948886,1.965632
7,1.808397,1.304614,1.097672,1.684233,1.440152
8,1.122038,1.495177,1.034389,1.90932,1.25878
9,1.662522,1.311711,1.520068,1.54671,1.184854


In [34]:
# Finding the column with the highest standard deviation
highest_std_column = df.std().idxmax()
print(f"Column with the highest standard deviation: {highest_std_column}")

# Finding the row with the lowest mean
lowest_mean_row = df.mean(axis=1).idxmin()
print(f"Row with the lowest mean: {lowest_mean_row}")

Column with the highest standard deviation: c
Row with the lowest mean: 11


#Question 11
Add a new column to the dataframe called “f” which is the sum of the columns “a”, “b”, “c”, “d”, “e”. Create another column called “g”. The value in the column “g” should be “LT8” if the value in the column “f” is less than 8 and “GT8” otherwise. Find the number of rows in the dataframe where the value in the column “g” is “LT8”. Find the standard deviation of the column “f” for the rows where the value in the column “g” is “LT8” and “GT8” respectively.

In [35]:
# Using above dataframe - we need to add 'f' col as sum of all other columns
df['f'] = df['a'] + df['b'] + df['c'] + df['d'] + df['e']
df

Unnamed: 0,a,b,c,d,e,f
0,1.37454,1.950714,1.731994,1.598658,1.156019,7.811925
1,1.155995,1.058084,1.866176,1.601115,1.708073,7.389442
2,1.020584,1.96991,1.832443,1.212339,1.181825,7.217101
3,1.183405,1.304242,1.524756,1.431945,1.291229,6.735577
4,1.611853,1.139494,1.292145,1.366362,1.45607,6.865923
5,1.785176,1.199674,1.514234,1.592415,1.04645,7.137949
6,1.607545,1.170524,1.065052,1.948886,1.965632,7.757638
7,1.808397,1.304614,1.097672,1.684233,1.440152,7.335069
8,1.122038,1.495177,1.034389,1.90932,1.25878,6.819704
9,1.662522,1.311711,1.520068,1.54671,1.184854,7.225866


In [36]:
# In order to add 'g' col according to the question
g = np.array(['LT8' if df['f'][i]< 8 else 'GT8' for i in range(0,20)])
g
df['g'] = g
df

Unnamed: 0,a,b,c,d,e,f,g
0,1.37454,1.950714,1.731994,1.598658,1.156019,7.811925,LT8
1,1.155995,1.058084,1.866176,1.601115,1.708073,7.389442,LT8
2,1.020584,1.96991,1.832443,1.212339,1.181825,7.217101,LT8
3,1.183405,1.304242,1.524756,1.431945,1.291229,6.735577,LT8
4,1.611853,1.139494,1.292145,1.366362,1.45607,6.865923,LT8
5,1.785176,1.199674,1.514234,1.592415,1.04645,7.137949,LT8
6,1.607545,1.170524,1.065052,1.948886,1.965632,7.757638,LT8
7,1.808397,1.304614,1.097672,1.684233,1.440152,7.335069,LT8
8,1.122038,1.495177,1.034389,1.90932,1.25878,6.819704,LT8
9,1.662522,1.311711,1.520068,1.54671,1.184854,7.225866,LT8


In [38]:
# In order to find the no of rows where value in g column is 'LT8'
cnt = 0
for i in df['g']:
    if i =='LT8':
        cnt = cnt + 1
print(cnt)

18


In [40]:
# In order to find standard deviation of 'f' column for which row value in g column is 'LT8' and 'GT8' respectively
list_lt8 = []
list_gt8= []

#Using dictionary convention because of the dataframe
for key, value in df['g'].items():
    if (value =='LT8'):
        list_lt8.append(key)
    elif(value =='GT8'):
        list_gt8.append(key)

print(list_lt8)
print(list_gt8)

std_fColumn_lt8 = df['f'][list_lt8].std()
std_fColumn_gt8 = df['f'][list_gt8].std()

print("For LT8, Standard Deviation is:", std_fColumn_lt8)
print("For GT8, Standard Deviation is:", std_fColumn_gt8)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 18, 19]
[10, 17]
For LT8, Standard Deviation is: 0.4025886500663061
For GT8, Standard Deviation is: 0.7956146881961926


#Question 12
Write a small piece of code to explain broadcasting in numpy.

Broadcasting is a property of Numpy library in python that allows the matrices of different shapes to be combined and perform various arithmatic operations like addition, multiplication etc. without creating multiple copies of data. It automatically align the dimensions of matrices and perform element-wise operation.

In [50]:
#First array
array1 = np.array([[1, 5, 3], [4, 10, 8], [7, 18, 9]])

#Second array
array2 = np.array([11, 24, 30])

res = array1 + array2
print(res)

[[12 29 33]
 [15 34 38]
 [18 42 39]]


#Question 13
Write a function to compute the argmin of a numpy array. The function should take a numpy array as input and return the index of the minimum element. You can use the np.argmin function to verify your solution.

In [46]:
def minElement_idx(arr):
  minn = arr[0]
  idx = 0
  n = len(arr)
  for i in range(0,n):
    if(arr[i] < minn):
      minn = arr[i]
      idx = i

  return idx
arr = np.array([5, 8, 2, -16, 7, 3, 9, 1,-11])

# Output of the function
index = minElement_idx(arr)
print("Index of minimum element with the above-mentioned function:", index)

# Verifying the output with np.argmin
argmin = np.argmin(arr)
print("Index of minimum element using np.argmin:", argmin)

Index of minimum element with the above-mentioned function: 3
Index of minimum element using np.argmin: 3
