In [1]:
import subprocess
from datetime import datetime
from IPython import get_ipython

# --- CONFIGURATION ---
NOTEBOOK_NAME = "Data Transformation.ipynb"
PLUGIN_NAME = "jupyterlab/4.0.0"
LANGUAGE = "Python"
# ----------------------

def log_to_wakatime():
    timestamp = str(datetime.utcnow().timestamp())
    result = subprocess.run([
        "wakatime-cli",
        "--entity", NOTEBOOK_NAME,
        "--entity-type", "file",
        "--plugin", PLUGIN_NAME,
        "--language", LANGUAGE,
        "--write",
        "--time", timestamp
    ], capture_output=True, text=True)

    if result.returncode != 0:
        print("❌ WakaTime CLI Error:")
        print("STDOUT:", result.stdout)
        print("STDERR:", result.stderr)
    else:
        print("✅ WakaTime heartbeat sent at", timestamp)

def on_cell_run(execution_info):
    log_to_wakatime()

# Clear broken old handlers (if rerunning)
ip = get_ipython()
for cb in list(ip.events.callbacks['pre_run_cell']):
    if cb.__name__ == "<lambda>":
        ip.events.unregister('pre_run_cell', cb)

ip.events.register('pre_run_cell', on_cell_run)

# Data Transformation

Once your data is clean, the next step is to **reshape, reformat, and reorder** it as needed for analysis. Pandas gives you plenty of flexible tools to do this.

---

## Sorting & Ranking

### Sort by Values

```python
df.sort_values("Age")                   # Ascending sort
df.sort_values("Age", ascending=False)  # Descending
df.sort_values(["Age", "Salary"])       # Sort by multiple columns
```
df.sort_values(["Age", "Salary"]) sorts the DataFrame first by the "Age" column, and if there are ties (i.e., two or more rows with the same "Age"), it will sort by the "Salary" column.

### Reset Index
If you want the index to start from 0 and be sequential, you can reset it using reset_index()
```python
df.reset_index(drop=True, inplace=True)  # Reset the index and drop the old index
```
### Sort by Index

```python
df.sort_index()
```
The df.sort_index() function is used to sort the DataFrame based on its index values. If the index is not in a sequential order (e.g., you have dropped rows or performed other operations that change the index), you can use sort_index() to restore it to a sorted order.
### Ranking
The .rank() function in pandas is used to assign ranks to numeric values in a column, like scores or points. By default, it gives the average rank to tied values, which can result in decimal numbers. For example, if two people share the top score, they both get a rank of 1.5. You can customize the ranking behavior using the method parameter. One useful option is method='dense', which assigns the same rank to ties but doesn’t leave gaps in the ranking sequence. This is helpful when you want a clean, consecutive ranking system without skips.
```python
df["Rank"] = df["Score"].rank()                 # Default: average method
df["Rank"] = df["Score"].rank(method="dense")   # 1, 2, 2, 3
```

---

## Renaming Columns & Index

```python
df.rename(columns={"oldName": "newName"}, inplace=True)
df.rename(index={0: "row1", 1: "row2"}, inplace=True)
```

To rename all columns:

```python
df.columns = ["Name", "Age", "City"]
```

---

## Changing Column Order

Just pass a new list of column names:

```python
df = df[["City", "Name", "Age"]]   # Reorder as desired
```

You can also move one column to the front:

```python
cols = ["Name"] + [col for col in df.columns if col != "Name"]
df = df[cols]
```

---



## Summary

- Sort, rank, and rename to prepare your data    
- Reordering and reshaping are key for EDA and visualization

 

In [2]:
import pandas as pd

✅ WakaTime heartbeat sent at 1752626088.773732


In [3]:
df = pd.read_csv("bollyData.csv")

✅ WakaTime heartbeat sent at 1752626089.150644


In [4]:
df

✅ WakaTime heartbeat sent at 1752626089.247332


Unnamed: 0,Title,Year,Genre,Director,Actors,Language,Runtime (min),Budget (INR Cr),BoxOffice (INR Cr),IMDb,Production House,Awards Won,Is_Hit
0,Dilwale Dulhania Le Jayenge,1999,Biopic|Romance,Anurag Kashyap,Ranveer Singh|Ayushmann Khurrana|Aamir Khan,Hindi,178,270.26,738.92,9.4,Viacom18 Studios,0,No
1,3 Idiots,2024,Drama|Historical,Rajkumar Hirani,Priyanka Chopra|Salman Khan|Ranbir Kapoor,Telugu,103,157.87,676.66,7.8,Red Chillies Entertainment,17,Average
2,Dangal,2018,Drama|Comedy,Rohit Shetty,Aamir Khan|Deepika Padukone|Ranbir Kapoor,Bengali,131,22.96,59.67,7.3,T-Series,6,Average
3,Sholay,2011,Comedy|Action,Anurag Kashyap,Deepika Padukone|Alia Bhatt|Aamir Khan,Hindi,152,197.13,1127.47,5.4,Yash Raj Films,17,No
4,Lagaan,2018,Action|Musical,Sanjay Leela Bhansali,Salman Khan|Ranbir Kapoor|Shah Rukh Khan,Punjabi,172,209.45,559.89,7.4,Excel Entertainment,6,No
5,PK,2014,Historical|Thriller,Zoya Akhtar,Deepika Padukone|Alia Bhatt|Ranveer Singh,Tamil,140,156.02,1181.74,8.2,Red Chillies Entertainment,11,Yes
6,Bajrangi Bhaijaan,1996,Comedy|Romance,Karan Johar,Deepika Padukone|Ranbir Kapoor|Ayushmann Khurrana,Tamil,126,90.9,735.95,6.9,Balaji Motion Pictures,5,Average
7,Kabir Singh,1999,Biopic|Drama,Rohit Shetty,Alia Bhatt|Aamir Khan|Ayushmann Khurrana,Bengali,133,202.55,1008.79,3.2,Viacom18 Studios,13,Yes
8,Gully Boy,2000,Drama|Thriller,Anurag Kashyap,Shah Rukh Khan|Deepika Padukone|Kareena Kapoor,Punjabi,95,86.53,1123.16,8.0,Balaji Motion Pictures,20,Average
9,Barfi!,2007,Drama|Thriller,Rohit Shetty,Ayushmann Khurrana|Deepika Padukone|Ranbir Kapoor,Hindi,158,176.52,742.85,8.1,T-Series,10,No


In [5]:
df.sort_values("Year")

✅ WakaTime heartbeat sent at 1752626089.352561


Unnamed: 0,Title,Year,Genre,Director,Actors,Language,Runtime (min),Budget (INR Cr),BoxOffice (INR Cr),IMDb,Production House,Awards Won,Is_Hit
21,Tanu Weds Manu,1990,Biopic|Thriller,Rohit Shetty,Kareena Kapoor|Priyanka Chopra|Ranbir Kapoor,Telugu,144,284.9,756.52,7.8,Dharma Productions,13,Yes
42,Padmaavat,1994,Thriller|Drama,Rajkumar Hirani,Ranbir Kapoor|Priyanka Chopra|Shah Rukh Khan,Telugu,156,156.59,623.39,3.2,Balaji Motion Pictures,13,Yes
17,Kahaani,1995,Romance|Historical,Rohit Shetty,Deepika Padukone|Shah Rukh Khan|Kareena Kapoor,Punjabi,90,180.26,390.74,8.0,Red Chillies Entertainment,9,Yes
34,Talvar,1995,Biopic|Comedy,Meghna Gulzar,Kareena Kapoor|Priyanka Chopra|Deepika Padukone,Punjabi,143,62.29,412.23,8.9,Yash Raj Films,15,Yes
23,Bhaag Milkha Bhaag,1996,Biopic|Romance,Sanjay Leela Bhansali,Deepika Padukone|Kareena Kapoor|Aamir Khan,Tamil,163,141.59,266.6,7.2,Dharma Productions,18,Average
6,Bajrangi Bhaijaan,1996,Comedy|Romance,Karan Johar,Deepika Padukone|Ranbir Kapoor|Ayushmann Khurrana,Tamil,126,90.9,735.95,6.9,Balaji Motion Pictures,5,Average
14,My Name is Khan,1996,Biopic|Drama,Karan Johar,Priyanka Chopra|Salman Khan|Ranbir Kapoor,Hindi,138,126.52,51.16,4.4,Excel Entertainment,6,Average
22,Piku,1997,Historical|Action,Zoya Akhtar,Salman Khan|Aamir Khan|Priyanka Chopra,Hindi,151,35.19,116.49,8.2,Balaji Motion Pictures,11,No
19,Raazi,1997,Romance|Musical,Rajkumar Hirani,Aamir Khan|Kareena Kapoor|Salman Khan,Telugu,116,107.91,608.12,4.7,Dharma Productions,4,No
16,Queen,1997,Historical|Historical,Karan Johar,Salman Khan|Kareena Kapoor|Shah Rukh Khan,Punjabi,139,40.71,1089.34,9.0,Viacom18 Studios,18,No


In [6]:
df.sort_values("Year", ascending=False)

✅ WakaTime heartbeat sent at 1752626089.456439


Unnamed: 0,Title,Year,Genre,Director,Actors,Language,Runtime (min),Budget (INR Cr),BoxOffice (INR Cr),IMDb,Production House,Awards Won,Is_Hit
47,Don 2,2025,Romance|Romance,Imtiaz Ali,Ranveer Singh|Alia Bhatt|Priyanka Chopra,Punjabi,112,248.24,81.65,7.6,Red Chillies Entertainment,4,Average
1,3 Idiots,2024,Drama|Historical,Rajkumar Hirani,Priyanka Chopra|Salman Khan|Ranbir Kapoor,Telugu,103,157.87,676.66,7.8,Red Chillies Entertainment,17,Average
36,Rang De Basanti,2023,Romance|Drama,Rajkumar Hirani,Aamir Khan|Ayushmann Khurrana|Salman Khan,Telugu,140,180.9,494.9,6.0,Red Chillies Entertainment,14,No
10,Tamasha,2022,Action|Musical,Meghna Gulzar,Ranbir Kapoor|Priyanka Chopra|Kareena Kapoor,Hindi,104,27.26,123.15,5.1,T-Series,12,Average
25,Uri: The Surgical Strike,2021,Musical|Musical,Meghna Gulzar,Ranveer Singh|Ayushmann Khurrana|Aamir Khan,Punjabi,152,120.99,40.05,6.9,Excel Entertainment,16,Average
2,Dangal,2018,Drama|Comedy,Rohit Shetty,Aamir Khan|Deepika Padukone|Ranbir Kapoor,Bengali,131,22.96,59.67,7.3,T-Series,6,Average
4,Lagaan,2018,Action|Musical,Sanjay Leela Bhansali,Salman Khan|Ranbir Kapoor|Shah Rukh Khan,Punjabi,172,209.45,559.89,7.4,Excel Entertainment,6,No
20,Andhadhun,2018,Musical|Action,Rohit Shetty,Ranbir Kapoor|Priyanka Chopra|Aamir Khan,Telugu,101,251.8,805.31,6.3,Viacom18 Studios,5,Yes
18,Drishyam,2018,Romance|Musical,Rajkumar Hirani,Ranbir Kapoor|Alia Bhatt|Ranveer Singh,Bengali,95,53.55,100.76,7.2,Balaji Motion Pictures,5,Average
12,Chak De! India,2016,Thriller|Comedy,Zoya Akhtar,Salman Khan|Ranveer Singh|Deepika Padukone,Telugu,128,81.17,1040.2,9.1,Yash Raj Films,6,Average


In [7]:
df2 = df.sort_values(["Year", "IMDb"]).copy()

✅ WakaTime heartbeat sent at 1752626089.559869


In [8]:
#df2.reset_index(drop=True, inplace=True)

✅ WakaTime heartbeat sent at 1752626089.674604


In [9]:
df2

✅ WakaTime heartbeat sent at 1752626089.767618


Unnamed: 0,Title,Year,Genre,Director,Actors,Language,Runtime (min),Budget (INR Cr),BoxOffice (INR Cr),IMDb,Production House,Awards Won,Is_Hit
21,Tanu Weds Manu,1990,Biopic|Thriller,Rohit Shetty,Kareena Kapoor|Priyanka Chopra|Ranbir Kapoor,Telugu,144,284.9,756.52,7.8,Dharma Productions,13,Yes
42,Padmaavat,1994,Thriller|Drama,Rajkumar Hirani,Ranbir Kapoor|Priyanka Chopra|Shah Rukh Khan,Telugu,156,156.59,623.39,3.2,Balaji Motion Pictures,13,Yes
17,Kahaani,1995,Romance|Historical,Rohit Shetty,Deepika Padukone|Shah Rukh Khan|Kareena Kapoor,Punjabi,90,180.26,390.74,8.0,Red Chillies Entertainment,9,Yes
34,Talvar,1995,Biopic|Comedy,Meghna Gulzar,Kareena Kapoor|Priyanka Chopra|Deepika Padukone,Punjabi,143,62.29,412.23,8.9,Yash Raj Films,15,Yes
14,My Name is Khan,1996,Biopic|Drama,Karan Johar,Priyanka Chopra|Salman Khan|Ranbir Kapoor,Hindi,138,126.52,51.16,4.4,Excel Entertainment,6,Average
6,Bajrangi Bhaijaan,1996,Comedy|Romance,Karan Johar,Deepika Padukone|Ranbir Kapoor|Ayushmann Khurrana,Tamil,126,90.9,735.95,6.9,Balaji Motion Pictures,5,Average
23,Bhaag Milkha Bhaag,1996,Biopic|Romance,Sanjay Leela Bhansali,Deepika Padukone|Kareena Kapoor|Aamir Khan,Tamil,163,141.59,266.6,7.2,Dharma Productions,18,Average
19,Raazi,1997,Romance|Musical,Rajkumar Hirani,Aamir Khan|Kareena Kapoor|Salman Khan,Telugu,116,107.91,608.12,4.7,Dharma Productions,4,No
31,Masaan,1997,Comedy|Thriller,Karan Johar,Ranveer Singh|Ranbir Kapoor|Deepika Padukone,Hindi,114,115.72,206.5,5.0,Yash Raj Films,17,No
22,Piku,1997,Historical|Action,Zoya Akhtar,Salman Khan|Aamir Khan|Priyanka Chopra,Hindi,151,35.19,116.49,8.2,Balaji Motion Pictures,11,No


In [10]:
df2.sort_index()

✅ WakaTime heartbeat sent at 1752626089.872464


Unnamed: 0,Title,Year,Genre,Director,Actors,Language,Runtime (min),Budget (INR Cr),BoxOffice (INR Cr),IMDb,Production House,Awards Won,Is_Hit
0,Dilwale Dulhania Le Jayenge,1999,Biopic|Romance,Anurag Kashyap,Ranveer Singh|Ayushmann Khurrana|Aamir Khan,Hindi,178,270.26,738.92,9.4,Viacom18 Studios,0,No
1,3 Idiots,2024,Drama|Historical,Rajkumar Hirani,Priyanka Chopra|Salman Khan|Ranbir Kapoor,Telugu,103,157.87,676.66,7.8,Red Chillies Entertainment,17,Average
2,Dangal,2018,Drama|Comedy,Rohit Shetty,Aamir Khan|Deepika Padukone|Ranbir Kapoor,Bengali,131,22.96,59.67,7.3,T-Series,6,Average
3,Sholay,2011,Comedy|Action,Anurag Kashyap,Deepika Padukone|Alia Bhatt|Aamir Khan,Hindi,152,197.13,1127.47,5.4,Yash Raj Films,17,No
4,Lagaan,2018,Action|Musical,Sanjay Leela Bhansali,Salman Khan|Ranbir Kapoor|Shah Rukh Khan,Punjabi,172,209.45,559.89,7.4,Excel Entertainment,6,No
5,PK,2014,Historical|Thriller,Zoya Akhtar,Deepika Padukone|Alia Bhatt|Ranveer Singh,Tamil,140,156.02,1181.74,8.2,Red Chillies Entertainment,11,Yes
6,Bajrangi Bhaijaan,1996,Comedy|Romance,Karan Johar,Deepika Padukone|Ranbir Kapoor|Ayushmann Khurrana,Tamil,126,90.9,735.95,6.9,Balaji Motion Pictures,5,Average
7,Kabir Singh,1999,Biopic|Drama,Rohit Shetty,Alia Bhatt|Aamir Khan|Ayushmann Khurrana,Bengali,133,202.55,1008.79,3.2,Viacom18 Studios,13,Yes
8,Gully Boy,2000,Drama|Thriller,Anurag Kashyap,Shah Rukh Khan|Deepika Padukone|Kareena Kapoor,Punjabi,95,86.53,1123.16,8.0,Balaji Motion Pictures,20,Average
9,Barfi!,2007,Drama|Thriller,Rohit Shetty,Ayushmann Khurrana|Deepika Padukone|Ranbir Kapoor,Hindi,158,176.52,742.85,8.1,T-Series,10,No


In [11]:
df2["Rank"] = df2["IMDb"].rank(ascending=False, method="dense") 

✅ WakaTime heartbeat sent at 1752626089.982231


In [12]:
df2

✅ WakaTime heartbeat sent at 1752626090.081513


Unnamed: 0,Title,Year,Genre,Director,Actors,Language,Runtime (min),Budget (INR Cr),BoxOffice (INR Cr),IMDb,Production House,Awards Won,Is_Hit,Rank
21,Tanu Weds Manu,1990,Biopic|Thriller,Rohit Shetty,Kareena Kapoor|Priyanka Chopra|Ranbir Kapoor,Telugu,144,284.9,756.52,7.8,Dharma Productions,13,Yes,11.0
42,Padmaavat,1994,Thriller|Drama,Rajkumar Hirani,Ranbir Kapoor|Priyanka Chopra|Shah Rukh Khan,Telugu,156,156.59,623.39,3.2,Balaji Motion Pictures,13,Yes,30.0
17,Kahaani,1995,Romance|Historical,Rohit Shetty,Deepika Padukone|Shah Rukh Khan|Kareena Kapoor,Punjabi,90,180.26,390.74,8.0,Red Chillies Entertainment,9,Yes,9.0
34,Talvar,1995,Biopic|Comedy,Meghna Gulzar,Kareena Kapoor|Priyanka Chopra|Deepika Padukone,Punjabi,143,62.29,412.23,8.9,Yash Raj Films,15,Yes,4.0
14,My Name is Khan,1996,Biopic|Drama,Karan Johar,Priyanka Chopra|Salman Khan|Ranbir Kapoor,Hindi,138,126.52,51.16,4.4,Excel Entertainment,6,Average,28.0
6,Bajrangi Bhaijaan,1996,Comedy|Romance,Karan Johar,Deepika Padukone|Ranbir Kapoor|Ayushmann Khurrana,Tamil,126,90.9,735.95,6.9,Balaji Motion Pictures,5,Average,17.0
23,Bhaag Milkha Bhaag,1996,Biopic|Romance,Sanjay Leela Bhansali,Deepika Padukone|Kareena Kapoor|Aamir Khan,Tamil,163,141.59,266.6,7.2,Dharma Productions,18,Average,15.0
19,Raazi,1997,Romance|Musical,Rajkumar Hirani,Aamir Khan|Kareena Kapoor|Salman Khan,Telugu,116,107.91,608.12,4.7,Dharma Productions,4,No,27.0
31,Masaan,1997,Comedy|Thriller,Karan Johar,Ranveer Singh|Ranbir Kapoor|Deepika Padukone,Hindi,114,115.72,206.5,5.0,Yash Raj Films,17,No,25.0
22,Piku,1997,Historical|Action,Zoya Akhtar,Salman Khan|Aamir Khan|Priyanka Chopra,Hindi,151,35.19,116.49,8.2,Balaji Motion Pictures,11,No,7.0


In [13]:
df = df[["Title", "Actors", "Year", "Genre", "IMDb", "Awards Won"]] 

✅ WakaTime heartbeat sent at 1752626090.193876


In [14]:
df

✅ WakaTime heartbeat sent at 1752626090.287952


Unnamed: 0,Title,Actors,Year,Genre,IMDb,Awards Won
0,Dilwale Dulhania Le Jayenge,Ranveer Singh|Ayushmann Khurrana|Aamir Khan,1999,Biopic|Romance,9.4,0
1,3 Idiots,Priyanka Chopra|Salman Khan|Ranbir Kapoor,2024,Drama|Historical,7.8,17
2,Dangal,Aamir Khan|Deepika Padukone|Ranbir Kapoor,2018,Drama|Comedy,7.3,6
3,Sholay,Deepika Padukone|Alia Bhatt|Aamir Khan,2011,Comedy|Action,5.4,17
4,Lagaan,Salman Khan|Ranbir Kapoor|Shah Rukh Khan,2018,Action|Musical,7.4,6
5,PK,Deepika Padukone|Alia Bhatt|Ranveer Singh,2014,Historical|Thriller,8.2,11
6,Bajrangi Bhaijaan,Deepika Padukone|Ranbir Kapoor|Ayushmann Khurrana,1996,Comedy|Romance,6.9,5
7,Kabir Singh,Alia Bhatt|Aamir Khan|Ayushmann Khurrana,1999,Biopic|Drama,3.2,13
8,Gully Boy,Shah Rukh Khan|Deepika Padukone|Kareena Kapoor,2000,Drama|Thriller,8.0,20
9,Barfi!,Ayushmann Khurrana|Deepika Padukone|Ranbir Kapoor,2007,Drama|Thriller,8.1,10
