Installing Pandas

In [24]:
!pip install pandas

Defaulting to user installation because normal site-packages is not writeable


Load the dataset and display first few rows

In [4]:
import pandas as pd
df = pd.read_csv("Student_Marks.csv")
print("First 5 Rows:")
print(df.head())

First 5 Rows:
   number_courses  time_study   Marks
0               3       4.508  19.202
1               4       0.096   7.734
2               4       3.133  13.811
3               6       7.909  53.018
4               8       7.811  55.299


Check basic information

In [5]:
print("Shape of Dataset (rows, columns):")
print(df.shape)

print("\nColumn Data Types:")
print(df.dtypes)

print("\nFull Info:")
print(df.info())


Shape of Dataset (rows, columns):
(100, 3)

Column Data Types:
number_courses      int64
time_study        float64
Marks             float64
dtype: object

Full Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   number_courses  100 non-null    int64  
 1   time_study      100 non-null    float64
 2   Marks           100 non-null    float64
dtypes: float64(2), int64(1)
memory usage: 2.5 KB
None


In [None]:
Find and handle missing values

In [6]:
print("Missing Values in Each Column:")
print(df.isnull().sum())

# Fill missing values with mean (common method)
df = df.fillna(df.mean(numeric_only=True))


Missing Values in Each Column:
number_courses    0
time_study        0
Marks             0
dtype: int64


In [None]:
Calculate average, highest, lowest values

In [7]:
print("Average values:")
print(df.mean(numeric_only=True))

print("\nHighest values:")
print(df.max(numeric_only=True))

print("\nLowest values:")
print(df.min(numeric_only=True))


Average values:
number_courses     5.29000
time_study         4.07714
Marks             24.41769
dtype: float64

Highest values:
number_courses     8.000
time_study         7.957
Marks             55.299
dtype: float64

Lowest values:
number_courses    3.000
time_study        0.096
Marks             5.609
dtype: float64


Project : Simple Sales Data Analysis
Step 1: Add a column "Total"
Step 2: Total sales calculation
Step 3: Best-selling product (by quantity)
Step 4: Basic Sales Report

In [11]:
sales = pd.read_csv("sales.csv")
print(sales.head())

   Product   Quantity   Price
0       Pen         10      5
1  Notebook          5     20
2    Pencil         20     11
3  Sharpner         15      8
4     Scale          7      9


In [13]:
print(sales.columns)

Index(['Product ', 'Quantity ', 'Price'], dtype='object')


In [17]:
sales['Total'] = sales['Quantity '] * sales['Price']
print(sales)
total_sales = sales['Total'].sum()
print("Total Sales:", total_sales)

   Product   Quantity   Price  Total
0       Pen         10      5     50
1  Notebook          5     20    100
2    Pencil         20     11    220
3  Sharpner         15      8    120
4     Scale          7      9     63
5      Book         12      4     48
6   Compass         10     25    250
7  Notebook         16     11    176
8    Pencil          6     11     66
Total Sales: 1093


In [19]:
best_product = sales.groupby('Product ')['Quantity '].sum().idxmax()
print("Best Selling Product:", best_product)


Best Selling Product: Pencil


In [23]:
report = f"""
SALES REPORT
-----------------------
Total Sales: {total_sales}

Best Selling Product: {best_product}

Total Quantity Sold per Product:
{sales.groupby('Product ')['Quantity '].sum()}

Total Revenue per Product:
{sales.groupby('Product ')['Total'].sum()}
"""

print(report)



SALES REPORT
-----------------------
Total Sales: 1093

Best Selling Product: Pencil

Total Quantity Sold per Product:
Product 
Book        12
Compass     10
Notebook    21
Pen         10
Pencil      26
Scale        7
Sharpner    15
Name: Quantity , dtype: int64

Total Revenue per Product:
Product 
Book         48
Compass     250
Notebook    276
Pen          50
Pencil      286
Scale        63
Sharpner    120
Name: Total, dtype: int64

