In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('data/nptel.csv')  # Local path

In [None]:
## Section 2: Initial Data Exploration

df.head()

In [None]:
# Check the dimensions of the dataset (rows, columns)
print(f"Dataset shape: {df.shape}")
print(f"Number of records: {df.shape[0]}")
print(f"Number of columns: {df.shape[1]}")

In [None]:
df.info()

In [None]:
# Display all column names
print("Dataset columns:")
print(df.columns.tolist())

In [None]:
## Section 3: Data Selection and Filtering

# Select specific columns for analysis
selected_columns = df[['Name', 'Course Name', 'Choice 1 City']]
print("Selected columns preview:")
selected_columns.head()

In [None]:
# Analyze course popularity
print("Course enrollment distribution:")
course_counts = df['Course Name'].value_counts()
print(course_counts)

In [None]:
# Filter data by department (if Department column exists)
# Note: Check if 'Department' column exists in your dataset
if 'Department' in df.columns:
    data_science_records = df[df['Department'] == 'data_science']
    print(f"Number of Data Science department records: {data_science_records.shape[0]}")
else:
    print("Department column not found in dataset")

In [None]:
# Filter registrations by city
hyderabad_registrations = df[df['Choice 1 City'] == 'Hyderabad']
print(f"Number of registrations for Hyderabad: {len(hyderabad_registrations)}")
print("\nSample records:")
hyderabad_registrations.head()

In [None]:
## Section 4: Data Quality Analysis

# Check for missing or zero amounts in the Amount column
missing_or_zero_amounts = df[df['Amount'].isna() | (df['Amount'] == 0)]
print(f"Records with missing or zero amounts: {len(missing_or_zero_amounts)}")
missing_or_zero_amounts.head()

In [None]:
temp=pd.to_datetime(df['Exam Date'])

In [None]:
df['Exam Date']=temp

In [None]:
df[df['Exam Date']>'2025-07-01'].shape

In [None]:
df['Exam Date'].sort_values()

In [None]:
df['Amount'].sort_values(ascending=False)

In [None]:
df.sort_values(by=['Course Name', 'Choice 1 City'])

In [None]:
df.isnull()

In [None]:
df[df['PWD status'].isna()].shape


In [None]:
df[df['Choice 1 City'].isna()].shape

In [None]:
df[df['Course Name'].isna()].shape[0]

In [None]:
df['Course Name'].value_counts()

In [None]:
df['Amount'].mean()

In [None]:
df['Choice 1 City'].value_counts()

In [None]:
df.isnull().sum()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
X_points=np.array((0,10))
Y_points=np.array((0,100))
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Line Plot',fontsize=20,loc='center')
plt.plot(X_points,Y_points,linestyle='dashed',color='green',lw=4.5,label='Line 1')
X_points=np.array((5,20))
Y_points=np.array((30,80))
plt.plot(X_points,Y_points,linestyle='dotted',color='red',lw=4.5,label='Line 2')
plt.grid(axis='x')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
X_points=np.random.normal((19,20,30))
Y_points=np.random.normal((100,10,200))
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Line Plot',fontsize=20,loc='center')
plt.plot(X_points,Y_points,linestyle='dashed',color='green',lw=4.5,label='Line 1',marker='o',mec='blue')
X_points=np.random.normal((10,5,7))
Y_points=np.random.normal((11,12,13))
plt.plot(X_points,Y_points,linestyle='dotted',color='red',lw=4.5,label='Line 2',marker='D',mec='yellow')
plt.grid(axis='x')
plt.legend()
plt.show()

In [None]:
xp=np.array(['x','y','z'])
yp=np.array([1,2,3])
colors=['red','green','blue']
plt.barh(xp,yp,color=colors,label=colors,edgecolor='black',height=.4,)
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Bar Plot',fontsize=20,loc='left')
plt.xticks(rotation=45)
plt.grid()
plt.legend()
plt.show()

In [None]:
sizes=np.array([20,30,25,25])
labels=np.array(['A','B','C','D'])
colors=np.array(['red','green','blue','orange'])
explode=(0.2,0,0,0)
plt.pie(sizes,labels=labels,colors=colors,explode=explode,autopct='%1.1f%%',shadow=True,startangle=90,wedgeprops={'edgecolor':'black'},pctdistance=0.7,labeldistance=1.1)
plt.title('Pie Chart',fontsize=20,loc='center',color='red')
plt.legend(title='Categories',loc='lower right')
plt.show()

In [None]:
x_points=np.random.normal(15,20,250)
plt.hist(x_points)
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Histogram',fontsize=20,loc='center')
plt.grid()
plt.show()