In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import pandas as pd

# Sample data with mixed date formats
data = {
    'id': [1, 2, 3, 4, 5, 6],
    'raw_date': [
        '2025-10-06',     # ISO format
        '06/10/2025',     # European format (DD/MM/YYYY)
        '10/06/2025',     # US format (MM/DD/YYYY)
        'October 6, 2025',# Full month name
        '06-Oct-2025',    # Abbreviated month
        '20251006'        # Compact YYYYMMDD
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

print(df)

   id         raw_date
0   1       2025-10-06
1   2       06/10/2025
2   3       10/06/2025
3   4  October 6, 2025
4   5      06-Oct-2025
5   6         20251006


In [3]:
df

Unnamed: 0,id,raw_date
0,1,2025-10-06
1,2,06/10/2025
2,3,10/06/2025
3,4,"October 6, 2025"
4,5,06-Oct-2025
5,6,20251006


# converting column into datetime

In [5]:
# 'df' DataFrame ke 'raw_date' column ko datetime format mein convert karo
# errors='coerce' ka matlab hai agar koi invalid date hai toh usko NaT (Not a Time) bana do
pd.to_datetime(df['raw_date'], errors='coerce')

0   2025-10-06
1          NaT
2          NaT
3          NaT
4          NaT
5          NaT
Name: raw_date, dtype: datetime64[ns]

In [9]:
# "raw_date" column ke index 1 se 2 tak ke values ko datetime format me convert kar rahe hain
a = pd.to_datetime(df["raw_date"][1:2], format='mixed')
# format='mixed' ka matlab hai ki agar dates alag-alag format me hain (jaise 'YYYY-MM-DD' aur 'DD/MM/YYYY'),
# to pandas unhe automatically detect karke sahi format me convert karega

In [10]:
a

1   2025-06-10
Name: raw_date, dtype: datetime64[ns]

In [12]:
a.dt.year# [translate:Is line se hum datetime object me se year ka part nikalte hain]
a.dt.year

1    2025
Name: raw_date, dtype: int32

In [14]:
a.dt.month # is se hum pandas Series ke datetime object me se month part ko extract karte hain
# Yani ki ye code datetime se month number (1 to 12) return karega

1    6
Name: raw_date, dtype: int32

In [15]:
a.dt.day

1    10
Name: raw_date, dtype: int32

In [16]:
a.dt.month_name()

1    June
Name: raw_date, dtype: object

In [17]:
import pandas as pd

# Sample sales data
data = {
    'sale_id': [101, 102, 103, 104, 105],
    'product': ['Widget', 'Gadget', 'Widget', 'Thingy', 'Gadget'],
    'quantity': [3, 5, 2, 7, 1],
    'price_per_unit': [20.0, 15.5, 20.0, 10.0, 15.5],
    'sale_date': [
        '2025-10-01',
        '2025-10-02',
        '2025-10-03',
        '2025-10-03',
        '2025-10-04'
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Calculate total

In [18]:
df

Unnamed: 0,sale_id,product,quantity,price_per_unit,sale_date
0,101,Widget,3,20.0,2025-10-01
1,102,Gadget,5,15.5,2025-10-02
2,103,Widget,2,20.0,2025-10-03
3,104,Thingy,7,10.0,2025-10-03
4,105,Gadget,1,15.5,2025-10-04


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   sale_id         5 non-null      int64  
 1   product         5 non-null      object 
 2   quantity        5 non-null      int64  
 3   price_per_unit  5 non-null      float64
 4   sale_date       5 non-null      object 
dtypes: float64(1), int64(2), object(2)
memory usage: 332.0+ bytes


In [22]:
df['order_date'] = pd.to_datetime(df['sale_date'])  # yeh line 'sale_date' column ko datetime format mein convert
# karke naya column 'order_date' banati hai

In [26]:
df['order_date']

0   2025-10-01
1   2025-10-02
2   2025-10-03
3   2025-10-03
4   2025-10-04
Name: order_date, dtype: datetime64[ns]

In [27]:
df

Unnamed: 0,sale_id,product,quantity,price_per_unit,sale_date,order_date
0,101,Widget,3,20.0,2025-10-01,2025-10-01
1,102,Gadget,5,15.5,2025-10-02,2025-10-02
2,103,Widget,2,20.0,2025-10-03,2025-10-03
3,104,Thingy,7,10.0,2025-10-03,2025-10-03
4,105,Gadget,1,15.5,2025-10-04,2025-10-04


In [28]:
df['order_date'].dt.day_name()

0    Wednesday
1     Thursday
2       Friday
3       Friday
4     Saturday
Name: order_date, dtype: object

In [30]:
# df ke andar ke data ko group karna hai 'order_date' ke day name ke basis par
# dt.day_name() se date ka din ka naam milta hai jese Monday, Tuesday, etc.
# fir har din ke group ke liye 'price_per_unit' ka sum nikalna hai
df.groupby(df['order_date'].dt.day_name()).sum('price_per_unit')

Unnamed: 0_level_0,sale_id,quantity,price_per_unit
order_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Friday,207,9,30.0
Saturday,105,1,15.5
Thursday,102,5,15.5
Wednesday,101,3,20.0
