In [None]:
#Dataset
import numpy as np

np.random.seed(42)

cgpa = np.round(np.random.uniform(5.0, 10.0, 100), 2)
placement = (cgpa + np.random.normal(0, 1, 100)) > 7.0
placement = placement.astype(int)
package = np.where(placement == 1, np.round(cgpa * np.random.uniform(0.5, 1.2, 100), 2), 0.0)

dataset = np.column_stack((cgpa, package, placement))

np.savetxt("placement_multi_linear_reg.csv", dataset, delimiter=",", header="cgpa,package,placement", comments='')

print(dataset[:50])


[[ 6.87  0.    0.  ]
 [ 9.75  8.31  1.  ]
 [ 8.66  7.83  1.  ]
 [ 7.99  0.    0.  ]
 [ 5.78  0.    0.  ]
 [ 5.78  0.    0.  ]
 [ 5.29  0.    0.  ]
 [ 9.33  4.82  1.  ]
 [ 8.01  7.62  1.  ]
 [ 8.54  5.33  1.  ]
 [ 5.1   0.    0.  ]
 [ 9.85 11.5   1.  ]
 [ 9.16 10.45  1.  ]
 [ 6.06  0.    0.  ]
 [ 5.91  0.    0.  ]
 [ 5.92  0.    0.  ]
 [ 6.52  0.    0.  ]
 [ 7.62  8.97  1.  ]
 [ 7.16  0.    0.  ]
 [ 6.46  0.    0.  ]
 [ 8.06  5.69  1.  ]
 [ 5.7   0.    0.  ]
 [ 6.46  0.    0.  ]
 [ 6.83  0.    0.  ]
 [ 7.28  0.    0.  ]
 [ 8.93  7.95  1.  ]
 [ 6.    0.    0.  ]
 [ 7.57  0.    0.  ]
 [ 7.96  7.16  1.  ]
 [ 5.23  0.    0.  ]
 [ 8.04  7.48  1.  ]
 [ 5.85  0.    0.  ]
 [ 5.33  0.    0.  ]
 [ 9.74  8.4   1.  ]
 [ 9.83 10.95  1.  ]
 [ 9.04  9.21  1.  ]
 [ 6.52  0.    0.  ]
 [ 5.49  5.44  1.  ]
 [ 8.42  6.33  1.  ]
 [ 7.2   5.08  1.  ]
 [ 5.61  0.    0.  ]
 [ 7.48  0.    0.  ]
 [ 5.17  0.    0.  ]
 [ 9.55 10.88  1.  ]
 [ 6.29  5.4   1.  ]
 [ 8.31  7.07  1.  ]
 [ 6.56  6.95  1.  ]
 [ 7.6   0.  

In [None]:
import pandas as pd
df = pd.read_csv('/content/placement_multi_linear_reg.csv')
df

Unnamed: 0,cgpa,package,placement
0,6.87,0.00,0.0
1,9.75,8.31,1.0
2,8.66,7.83,1.0
3,7.99,0.00,0.0
4,5.78,0.00,0.0
...,...,...,...
95,7.47,0.00,0.0
96,7.61,6.76,1.0
97,7.14,6.63,1.0
98,5.13,0.00,0.0


## Question 1: Basic Array Operations

In [None]:
cgpa = data[:, 0]
package = data[:, 1]

cgpa_mean = np.mean(cgpa)
cgpa_std = np.std(cgpa)
package_mean = np.mean(package)
package_std = np.std(package)

print("First 5 CGPA:", cgpa[:5])
print("First 5 Packages:", package[:5])
print("CGPA - Mean:", cgpa_mean, "Std Dev:", cgpa_std)
print("Package - Mean:", package_mean, "Std Dev:", package_std)



Question 1:
First 5 CGPA: [6.87 9.75 8.66 7.99 5.78]
First 5 Packages: [0.   8.31 7.83 0.   0.  ]
CGPA - Mean: 7.3507 Std Dev: 1.4800792242309193
Package - Mean: 3.8651 Std Dev: 3.8836347652682277


## Question 2: Filtering and Counting

In [None]:
placement = data[:, 2]
mask = (cgpa > 7.0) & (placement == 1)
filtered = data[mask]

print("Number of students with CGPA > 7.0 and placed:", filtered.shape[0])
print("First 3 rows of filtered data:\n", filtered[:3])



Question 2:
Number of students with CGPA > 7.0 and placed: 46
First 3 rows of filtered data:
 [[9.75 8.31 1.  ]
 [8.66 7.83 1.  ]
 [9.33 4.82 1.  ]]


## Question 3: Data Normalization

In [None]:
min_p = np.min(package)
max_p = np.max(package)
normalized_package = (package - min_p) / (max_p - min_p)  #formula

print("First 5 normalized package values:", normalized_package[:5])
print("Normalized package range: Min =", np.min(normalized_package), ", Max =", np.max(normalized_package))


First 5 normalized package values: [0.         0.7226087  0.68086957 0.         0.        ]
Normalized package range: Min = 0.0 , Max = 1.0


## Question 4: Correlation Analysis

In [None]:
corr_matrix = np.corrcoef(cgpa, package) #using pearson
print("Correlation matrix:\n", corr_matrix)
print("Interpretation: CGPA and package have a correlation coefficient of", corr_matrix[0, 1])

Correlation matrix:
 [[1.         0.78220725]
 [0.78220725 1.        ]]
Interpretation: CGPA and package have a correlation coefficient of 0.7822072504928574


## Question 5: Grouping and Aggregation

In [None]:
placed_mask = placement == 1
not_placed_mask = placement == 0

avg_package_placed = np.mean(package[placed_mask])
avg_package_not_placed = np.mean(package[not_placed_mask])

print("Average package (placed):", round(avg_package_placed, 2))
print("Average package (not placed):", round(avg_package_not_placed, 2))



Question 5:
Average package (placed): 7.29
Average package (not placed): 0.0


### Question 6: Array Manipulation

In [None]:
high_cgpa_mask = cgpa > 6.5
filtered_data = data[high_cgpa_mask][:, [0, 1]]  # only cgpa and package

sorted_indices = np.argsort(-filtered_data[:, 1])
sorted_filtered = filtered_data[sorted_indices]

print("Top 5 rows sorted by package:\n", sorted_filtered[:5])

Top 5 rows sorted by package:
 [[ 9.85 11.5 ]
 [ 9.85 11.06]
 [ 9.83 10.95]
 [ 9.55 10.88]
 [ 9.16 10.45]]


### Question 7: Binning CGPA

In [None]:
bins = [0, 6.0, 7.5, np.inf]
# categories: 0-Low, 1-Medium, 2-High
categories = np.digitize(cgpa, bins) - 1

unique, counts = np.unique(categories, return_counts=True)

labels = ['Low (<6.0)', 'Medium (6.0-7.5)', 'High (>=7.5)']
print("\nQuestion 7:")
for label, count in zip(labels, counts):
    print(f"{label}: {count} students")



Question 7:
Low (<6.0): 27 students
Medium (6.0-7.5): 26 students
High (>=7.5): 47 students


### Question 8: Missing Value Handling

In [None]:
package_with_nan = package.copy()
num_missing = int(0.1 * len(package))
missing_indices = np.random.choice(len(package), size=num_missing, replace=False)
package_with_nan[missing_indices] = np.nan

median_value = np.nanmedian(package_with_nan)
package_with_nan[np.isnan(package_with_nan)] = median_value

print("Missing values introduced:", num_missing)
print("First 5 values of modified package array:", package_with_nan[:5])


Missing values introduced: 10
First 5 values of modified package array: [0.   8.31 7.83 0.   0.  ]


### Sokaler Nastha

In [None]:

placed_data = data[placement == 1][:, [0, 1]]  # cgpa, package

mean_point = np.mean(placed_data, axis=0)
distances = np.linalg.norm(placed_data - mean_point, axis=1)

max_idx = np.argmax(distances)
max_distance = distances[max_idx]
student_info = placed_data[max_idx]

print("Maximum distance from mean:", max_distance)
print("Student with max distance - CGPA:", student_info[0], ", Package:", student_info[1])

Maximum distance from mean: 4.450332566242684
Student with max distance - CGPA: 9.85 , Package: 11.5
