In [1]:
import numpy as np

# Step 1: Create a 1D NumPy array with values from 1 to 20
arr = np.arange(1, 21)
print("Array:", arr)

# Step 2: Find all prime numbers in the array
# A prime number is greater than 1 and divisible only by 1 and itself
prime_numbers = []  # empty list to store primes

for num in arr:
    if num > 1:  # primes are greater than 1
        is_prime = True  # assume number is prime
        for i in range(2, int(num**0.5) + 1):
            if num % i == 0:
                is_prime = False  # found a divisor, not prime
                break
        if is_prime:
            prime_numbers.append(num)

prime_numbers = np.array(prime_numbers)  # convert list to NumPy array
print("Prime numbers:", prime_numbers)

# Step 3: Compute mean and variance of the prime numbers
mean_primes = np.mean(prime_numbers)
variance_primes = np.var(prime_numbers)

print("Mean of primes:", mean_primes)
print("Variance of primes:", variance_primes)


Array: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Prime numbers: [ 2  3  5  7 11 13 17 19]
Mean of primes: 9.625
Variance of primes: 35.734375


In [2]:
import numpy as np

# Step (a): Create a 4x4 array with numbers 1 to 16
arr = np.arange(1, 17).reshape(4, 4)
print("4x4 Array:\n", arr)

# Step (b): Extract the 2x2 bottom-left sub-matrix
# Bottom-left means last 2 rows, first 2 columns
sub_matrix = arr[2:4, 0:2]
print("\n2x2 Bottom-left Sub-matrix:\n", sub_matrix)

# Step (c): Compute the determinant of the sub-matrix
determinant = np.linalg.det(sub_matrix)
print("\nDeterminant of sub-matrix:", determinant)


4x4 Array:
 [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]]

2x2 Bottom-left Sub-matrix:
 [[ 9 10]
 [13 14]]

Determinant of sub-matrix: -4.0000000000000036


In [3]:
import pandas as pd

# Step (a): Create a DataFrame with 5 students and their marks
# Each student has marks in Math, Science, and English
students = ['Alice', 'Bob', 'Charlie', 'David', 'Eva']
math_marks = [85, 92, 78, 90, 88]
science_marks = [80, 89, 84, 95, 91]
english_marks = [75, 94, 82, 88, 86]

# Combine into a dictionary for pandas
data = {
    'Name': students,
    'Math': math_marks,
    'Science': science_marks,
    'English': english_marks
}

# Create the DataFrame
df = pd.DataFrame(data)
print("Step 1: Original DataFrame\n", df)

# Step (b): Add columns for Total and Average marks
# Total = sum of marks in all subjects
df['Total'] = df['Math'] + df['Science'] + df['English']

# Average = Total divided by number of subjects
df['Average'] = df['Total'] / 3

print("\nStep 2: DataFrame with Total and Average\n", df)

# Step (c): Identify the topper
# Find the row with the highest average
topper = df[df['Average'] == df['Average'].max()]

# Print the topper's name and average
print("\nStep 3: Topper Details")
print("Name:", topper['Name'].values[0])
print("Average Marks:", topper['Average'].values[0])


Step 1: Original DataFrame
       Name  Math  Science  English
0    Alice    85       80       75
1      Bob    92       89       94
2  Charlie    78       84       82
3    David    90       95       88
4      Eva    88       91       86

Step 2: DataFrame with Total and Average
       Name  Math  Science  English  Total    Average
0    Alice    85       80       75    240  80.000000
1      Bob    92       89       94    275  91.666667
2  Charlie    78       84       82    244  81.333333
3    David    90       95       88    273  91.000000
4      Eva    88       91       86    265  88.333333

Step 3: Topper Details
Name: Bob
Average Marks: 91.66666666666667


In [4]:
import numpy as np

# Step (a): Simulate 1000 coin tosses
# 1 = Head, 0 = Tail
tosses = np.random.randint(0, 2, size=1000)  # generates 1000 random 0s or 1s
print("First 20 tosses:", tosses[:20])  # just to see a small sample

# Step (b): Count frequency of heads and tails
heads_count = np.sum(tosses)        # sum of 1s gives number of heads
tails_count = 1000 - heads_count    # total tosses - heads gives tails

print("\nHeads:", heads_count)
print("Tails:", tails_count)

# Step (c): Estimate probability of heads
prob_heads = heads_count / 1000
print("\nEstimated Probability of Heads:", prob_heads)

# Check if it's close to 0.5
if abs(prob_heads - 0.5) < 0.05:
    print("Yes, it is close to 0.5 due to randomness over many tosses.")
else:
    print("It may not be exactly 0.5 in a single simulation, but should be close on average.")


First 20 tosses: [0 1 1 1 0 0 0 1 1 0 0 0 0 0 1 1 0 1 0 0]

Heads: 496
Tails: 504

Estimated Probability of Heads: 0.496
Yes, it is close to 0.5 due to randomness over many tosses.


In [5]:
import pandas as pd

# Step (a): Create a DataFrame of employees
data = {
    'ID': [101, 102, 103, 104, 105],
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Salary': [50000, 60000, 55000, 70000, 65000]
}

df = pd.DataFrame(data)
print("Step 1: Original DataFrame\n", df)

# Step (b): Add a Bonus column = 10% of Salary
df['Bonus'] = df['Salary'] * 0.10
print("\nStep 2: DataFrame with Bonus\n", df)

# Step (c): Display employees with salary above average
average_salary = df['Salary'].mean()  # calculate average salary
above_average = df[df['Salary'] > average_salary]  # filter employees

print("\nStep 3: Employees with Salary Above Average\n", above_average)


Step 1: Original DataFrame
     ID     Name  Salary
0  101    Alice   50000
1  102      Bob   60000
2  103  Charlie   55000
3  104    David   70000
4  105      Eva   65000

Step 2: DataFrame with Bonus
     ID     Name  Salary   Bonus
0  101    Alice   50000  5000.0
1  102      Bob   60000  6000.0
2  103  Charlie   55000  5500.0
3  104    David   70000  7000.0
4  105      Eva   65000  6500.0

Step 3: Employees with Salary Above Average
     ID   Name  Salary   Bonus
3  104  David   70000  7000.0
4  105    Eva   65000  6500.0


In [6]:
import numpy as np

# Step (a): Create a 3x3 array with values 1 to 9
A = np.arange(1, 10).reshape(3, 3)
print("Step 1: Original 3x3 Array A:\n", A)

# Step (b): Find transpose and inverse
A_transpose = A.T
print("\nStep 2: Transpose of A:\n", A_transpose)

# Inverse exists only if determinant is not zero
determinant = np.linalg.det(A)
print("\nDeterminant of A:", determinant)

if determinant != 0:
    A_inv = np.linalg.inv(A)
    print("\nInverse of A:\n", A_inv)
else:
    print("\nA is singular, inverse does not exist.")

# Step (c): Verify that A × A⁻¹ ≈ Identity
if determinant != 0:
    identity_check = np.dot(A, A_inv)
    print("\nA × A⁻¹:\n", identity_check)

    # Check if it is approximately equal to identity matrix
    print("\nIs approximately identity matrix:", np.allclose(identity_check, np.eye(3)))


Step 1: Original 3x3 Array A:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]

Step 2: Transpose of A:
 [[1 4 7]
 [2 5 8]
 [3 6 9]]

Determinant of A: -9.51619735392994e-16

Inverse of A:
 [[ 3.15251974e+15 -6.30503948e+15  3.15251974e+15]
 [-6.30503948e+15  1.26100790e+16 -6.30503948e+15]
 [ 3.15251974e+15 -6.30503948e+15  3.15251974e+15]]

A × A⁻¹:
 [[ 0.   1.  -0.5]
 [ 0.   2.  -1. ]
 [ 0.   3.   2.5]]

Is approximately identity matrix: False


In [7]:
import numpy as np

# -----------------------------
# Step (a): Simulate 500 dice rolls
# -----------------------------
# Each roll is a random integer between 1 and 6
num_rolls = 500
dice_rolls = np.random.randint(1, 7, size=num_rolls)

# Show a small sample of the rolls
print("Sample of first 20 dice rolls:", dice_rolls[:20])
print(f"Total number of dice rolls simulated: {num_rolls}\n")

# -----------------------------
# Step (b): Count frequency of each face
# -----------------------------
# Initialize a dictionary to store counts
face_counts = {}

# Loop through each possible face (1 to 6)
for face in range(1, 7):
    count = np.sum(dice_rolls == face)  # count how many times 'face' appears
    face_counts[face] = count

# Print counts for each face
print("Face\tCount")
for face, count in face_counts.items():
    print(f"{face}\t{count}")

# -----------------------------
# Step (c): Compute relative frequencies and compare with theoretical probability
# -----------------------------
print("\nFace\tCount\tRelative Frequency\tTheoretical Probability")
for face in range(1, 7):
    count = face_counts[face]
    relative_freq = count / num_rolls      # probability estimate
    theoretical_prob = 1/6                 # theoretical probability for fair dice
    print(f"{face}\t{count}\t{relative_freq:.3f}\t\t{theoretical_prob:.3f}")

# Optional: show total of relative frequencies
total_relative_freq = sum(face_counts[face] / num_rolls for face in range(1, 7))
print(f"\nSum of relative frequencies: {total_relative_freq:.3f} (should be close to 1.0)")


Sample of first 20 dice rolls: [1 2 1 4 3 4 1 3 6 4 6 1 1 2 1 6 6 3 3 4]
Total number of dice rolls simulated: 500

Face	Count
1	85
2	71
3	81
4	88
5	89
6	86

Face	Count	Relative Frequency	Theoretical Probability
1	85	0.170		0.167
2	71	0.142		0.167
3	81	0.162		0.167
4	88	0.176		0.167
5	89	0.178		0.167
6	86	0.172		0.167

Sum of relative frequencies: 1.000 (should be close to 1.0)


In [8]:
import pandas as pd

# Step (a): Create a DataFrame with 6 products
data = {
    'Name': ['Apple', 'Banana', 'Orange', 'Mango', 'Grapes', 'Pineapple'],
    'Quantity': [10, 15, 12, 8, 20, 5],
    'Price': [50, 20, 30, 60, 40, 80]  # price per unit
}

df = pd.DataFrame(data)
print("Step 1: Original Product DataFrame\n", df)

# Step (b): Add a column "Total = Quantity × Price"
df['Total'] = df['Quantity'] * df['Price']
print("\nStep 2: DataFrame with Total Sales\n", df)

# Step (c): Find the product that generated maximum sales revenue
max_total_row = df.loc[df['Total'].idxmax()]  # row with max Total
print("\nStep 3: Product with Maximum Sales Revenue")
print("Product Name:", max_total_row['Name'])
print("Total Sales Revenue:", max_total_row['Total'])


Step 1: Original Product DataFrame
         Name  Quantity  Price
0      Apple        10     50
1     Banana        15     20
2     Orange        12     30
3      Mango         8     60
4     Grapes        20     40
5  Pineapple         5     80

Step 2: DataFrame with Total Sales
         Name  Quantity  Price  Total
0      Apple        10     50    500
1     Banana        15     20    300
2     Orange        12     30    360
3      Mango         8     60    480
4     Grapes        20     40    800
5  Pineapple         5     80    400

Step 3: Product with Maximum Sales Revenue
Product Name: Grapes
Total Sales Revenue: 800


In [None]:
"""(a) Create a DataFrame with some missing values (NaN). 
(b) Fill missing values with column mean. 
(c) Drop rows where more than 1 value is missing. """

import pandas as pd
import numpy as np

# (a) Create a DataFrame with some missing values
data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [5, np.nan, np.nan, 8, 10],
    'C': [10, 11, 12, np.nan, 14]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# (b) Fill missing values with column mean
df_filled = df.fillna(df.mean())
print("\nDataFrame after filling NaN with column mean:")
print(df_filled)

# (c) Drop rows where more than 1 value is missing
df_dropped = df[df.isnull().sum(axis=1) <= 1]  # Keep rows with 1 or 0 NaNs
print("\nDataFrame after dropping rows with more than 1 missing value:")
print(df_dropped)


In [None]:
'''(a) Create a NumPy array with integers from 1 to 30. 
(b) Reshape it into a 5×6 matrix. 
(c) Extract all even numbers from the matrix and compute their average. '''


import numpy as np

# (a) Create a NumPy array with integers from 1 to 30
arr = np.arange(1, 31)
print("Original array:")
print(arr)

# (b) Reshape it into a 5×6 matrix
matrix = arr.reshape(5, 6)
print("\n5x6 matrix:")
print(matrix)

# (c) Extract all even numbers and compute their average
even_numbers = matrix[matrix % 2 == 0]  # Select numbers divisible by 2
average_even = even_numbers.mean()
print("\nEven numbers:")
print(even_numbers)
print("\nAverage of even numbers:", average_even)


In [9]:
'''Q13.  
(a) Create a DataFrame of 6 students with columns: Name, Age, Marks. 
(b) Select only students who scored above the overall average marks. 
(c) Display names of students younger than 20 whose marks are above 60. '''


import pandas as pd

# (a) Create a DataFrame of 6 students
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank'],
    'Age': [19, 21, 18, 22, 20, 17],
    'Marks': [75, 55, 65, 80, 50, 70]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# (b) Select students who scored above the overall average marks
average_marks = df['Marks'].mean()
above_average = df[df['Marks'] > average_marks]
print("\nStudents with marks above average:")
print(above_average)

# (c) Display names of students younger than 20 whose marks are above 60
filtered_students = df[(df['Age'] < 20) & (df['Marks'] > 60)]
names_list = filtered_students['Name'].tolist()
print("\nNames of students younger than 20 with marks > 60:")
print(names_list)


Original DataFrame:
      Name  Age  Marks
0    Alice   19     75
1      Bob   21     55
2  Charlie   18     65
3    David   22     80
4      Eva   20     50
5    Frank   17     70

Students with marks above average:
    Name  Age  Marks
0  Alice   19     75
3  David   22     80
5  Frank   17     70

Names of students younger than 20 with marks > 60:
['Alice', 'Charlie', 'Frank']


In [10]:
import pandas as pd

# (a) Create a DataFrame of 5 employees
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Tasks_Completed': [50, 40, 60, 45, 55],
    'Hours_Worked': [10, 8, 12, 9, 11]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# (b) Add a new column Efficiency = Tasks_Completed / Hours_Worked
df['Efficiency'] = df['Tasks_Completed'] / df['Hours_Worked']
print("\nDataFrame with Efficiency column:")
print(df)

# (c) Identify employee with the highest efficiency
max_eff_idx = df['Efficiency'].idxmax()  # Index of max efficiency
max_eff_employee = df.loc[max_eff_idx, 'Name']
max_eff_value = df.loc[max_eff_idx, 'Efficiency']

print(f"\nEmployee with highest efficiency: {max_eff_employee} ({max_eff_value})")


Original DataFrame:
      Name  Tasks_Completed  Hours_Worked
0    Alice               50            10
1      Bob               40             8
2  Charlie               60            12
3    David               45             9
4      Eva               55            11

DataFrame with Efficiency column:
      Name  Tasks_Completed  Hours_Worked  Efficiency
0    Alice               50            10         5.0
1      Bob               40             8         5.0
2  Charlie               60            12         5.0
3    David               45             9         5.0
4      Eva               55            11         5.0

Employee with highest efficiency: Alice (5.0)
