In [1]:
import subprocess
from datetime import datetime
from IPython import get_ipython

# --- CONFIGURATION ---
NOTEBOOK_NAME = "DataFrame Essentials.ipynb"
PLUGIN_NAME = "jupyterlab/4.0.0"
LANGUAGE = "Python"
# ----------------------

def log_to_wakatime():
    timestamp = str(datetime.utcnow().timestamp())
    result = subprocess.run([
        "wakatime-cli",
        "--entity", NOTEBOOK_NAME,
        "--entity-type", "file",
        "--plugin", PLUGIN_NAME,
        "--language", LANGUAGE,
        "--write",
        "--time", timestamp
    ], capture_output=True, text=True)

    if result.returncode != 0:
        print("❌ WakaTime CLI Error:")
        print("STDOUT:", result.stdout)
        print("STDERR:", result.stderr)
    else:
        print("✅ WakaTime heartbeat sent at", timestamp)

def on_cell_run(execution_info):
    log_to_wakatime()

# Clear broken old handlers (if rerunning)
ip = get_ipython()
for cb in list(ip.events.callbacks['pre_run_cell']):
    if cb.__name__ == "<lambda>":
        ip.events.unregister('pre_run_cell', cb)

ip.events.register('pre_run_cell', on_cell_run)

In [2]:
import pandas as pd

✅ WakaTime heartbeat sent at 1752331552.201035


# 🔹 What is Pandas?

**Pandas** is a Python library for **data manipulation and analysis**.
It provides two main data structures:

* `Series`: 1D labeled array (like a column)
* `DataFrame`: 2D labeled table (like an Excel sheet)


# Creating Series 

## If no custom index is provided, `pandas` index the series from 0 to n-1 `n: number of elemetns`

In [3]:
s1 = pd.Series([10,20,30,40])
print(s1)
print(type(s1))
print(s1.index[s1==30])
print(s1[1])

✅ WakaTime heartbeat sent at 1752331552.539573
0    10
1    20
2    30
3    40
dtype: int64
<class 'pandas.core.series.Series'>
Index([2], dtype='int64')
20


In [4]:
# Custom indexing
s2 = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
print(s2)
print(type(s2))
print(s2.index[s2==30])
print(s2['a'])

✅ WakaTime heartbeat sent at 1752331552.598801
a    10
b    20
c    30
dtype: int64
<class 'pandas.core.series.Series'>
Index(['c'], dtype='object')
10


# Creating DataFrames

In [5]:
data = [
    ["Alice", 25],
    ["Bob", 30],
    ["Charlie", 35]
]
 
df = pd.DataFrame(data, columns=["Name", "Age"])
print(df)

✅ WakaTime heartbeat sent at 1752331552.654987
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [6]:
data = [
    ["Alice", 25, 87],
    ["Bob", 30],
    ["Charlie", 35]
]
 
df = pd.DataFrame(data, columns=["Name", "Age", "Marks"])
print(df)

✅ WakaTime heartbeat sent at 1752331552.710755
      Name  Age  Marks
0    Alice   25   87.0
1      Bob   30    NaN
2  Charlie   35    NaN


In [7]:
data = {
    "name": ["Alice", "Bob", "Charlie"],
    "age": [25, 30, 35],
    "city": ["Delhi", "Mumbai", "Bangalore"]
}

df = pd.DataFrame(data)
print(df)

✅ WakaTime heartbeat sent at 1752331552.765502
      name  age       city
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   35  Bangalore


In [8]:
df = pd.DataFrame(data, index=["a", "b", "c"])
print(df)

# OR

df.index = ['x', 'y', 'z']
print(df)

✅ WakaTime heartbeat sent at 1752331552.82583
      name  age       city
a    Alice   25      Delhi
b      Bob   30     Mumbai
c  Charlie   35  Bangalore
      name  age       city
x    Alice   25      Delhi
y      Bob   30     Mumbai
z  Charlie   35  Bangalore


In [9]:
import numpy as np

arr = np.array([[1, 2], [3, 4]])
df = pd.DataFrame(arr, columns=["A", "B"])
print(df)

✅ WakaTime heartbeat sent at 1752331552.881442
   A  B
0  1  2
1  3  4


# Reading External Data
## ***Syntax***
```pd.read_FileType("File_Name.extension")```

In [10]:
df = pd.read_csv('data.csv')
print(df)

✅ WakaTime heartbeat sent at 1752331552.936072
    ID     Name  Age Gender  Marks    Subject       City
0  101    Alice   20      F     88       Math      Delhi
1  102      Bob   21      M     76    Physics     Mumbai
2  103  Charlie   22      M     93  Chemistry  Bangalore
3  104    Diana   20      F     85       Math    Chennai
4  105    Ethan   23      M     70    Biology       Pune
5  106    Fiona   21      F     90    Physics      Delhi
6  107   George   22      M     60       Math     Mumbai
7  108   Hannah   23      F     95  Chemistry  Hyderabad
8  109      Ian   20      M     55    Biology      Delhi
9  110     Jane   21      F     82    Physics    Kolkata


---

# If we want any row to be the column index then it can be done like this:

### Option 1: Set index while reading the CSV

### Option 2: Set index after reading

In [11]:
# Option 1:
df = pd.read_csv("data.csv", index_col="ID")
print(df)

✅ WakaTime heartbeat sent at 1752331552.990937
        Name  Age Gender  Marks    Subject       City
ID                                                   
101    Alice   20      F     88       Math      Delhi
102      Bob   21      M     76    Physics     Mumbai
103  Charlie   22      M     93  Chemistry  Bangalore
104    Diana   20      F     85       Math    Chennai
105    Ethan   23      M     70    Biology       Pune
106    Fiona   21      F     90    Physics      Delhi
107   George   22      M     60       Math     Mumbai
108   Hannah   23      F     95  Chemistry  Hyderabad
109      Ian   20      M     55    Biology      Delhi
110     Jane   21      F     82    Physics    Kolkata


In [12]:
# Option 2:
df = pd.read_csv("data.csv")
df.set_index("ID", inplace=True) # inplace=True modifies the DataFrame directly.
print(df)

✅ WakaTime heartbeat sent at 1752331553.046404
        Name  Age Gender  Marks    Subject       City
ID                                                   
101    Alice   20      F     88       Math      Delhi
102      Bob   21      M     76    Physics     Mumbai
103  Charlie   22      M     93  Chemistry  Bangalore
104    Diana   20      F     85       Math    Chennai
105    Ethan   23      M     70    Biology       Pune
106    Fiona   21      F     90    Physics      Delhi
107   George   22      M     60       Math     Mumbai
108   Hannah   23      F     95  Chemistry  Hyderabad
109      Ian   20      M     55    Biology      Delhi
110     Jane   21      F     82    Physics    Kolkata


| With `inplace=True`          | Without `inplace=True`                   |
| ---------------------------- | ---------------------------------------- |
| Modifies the original object | Returns a modified copy                  |
| Less memory usage            | More explicit, avoids accidental changes |
| Cannot be chained            | Can be used in method chains             |


In [13]:
print("\nHead:",df.head())        # First 5 rows
print("\nTail:",df.tail())        # Last 5 rows
print("\nInfo:",df.info())        # Column types and non-null counts
print("\nDescribe:",df.describe())    # Summary statistics
print("\nShape:",df.shape)         # (rows, columns)
print("\nColumns:",df.columns)       # Column names

✅ WakaTime heartbeat sent at 1752331553.102349

Head:         Name  Age Gender  Marks    Subject       City
ID                                                   
101    Alice   20      F     88       Math      Delhi
102      Bob   21      M     76    Physics     Mumbai
103  Charlie   22      M     93  Chemistry  Bangalore
104    Diana   20      F     85       Math    Chennai
105    Ethan   23      M     70    Biology       Pune

Tail:        Name  Age Gender  Marks    Subject       City
ID                                                  
106   Fiona   21      F     90    Physics      Delhi
107  George   22      M     60       Math     Mumbai
108  Hannah   23      F     95  Chemistry  Hyderabad
109     Ian   20      M     55    Biology      Delhi
110    Jane   21      F     82    Physics    Kolkata
<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 101 to 110
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Name     10

# Accessing Columns & Rows

In [14]:
# Access a column
print(df['Name'])

# Access multiple columns
print(df[['Name', 'Age']])

# Access rows by index position (first row, not 101st!)
print(df.iloc[0])   # change 101 to a valid position, like 0, 1, etc.

# Access rows by label (index must have 102 as a label!)
print(df.loc[102])  # this works only if 102 is an index label (e.g., ID)

# Conditional filtering
print(df[df['Age'] > 20])


✅ WakaTime heartbeat sent at 1752331553.162716
ID
101      Alice
102        Bob
103    Charlie
104      Diana
105      Ethan
106      Fiona
107     George
108     Hannah
109        Ian
110       Jane
Name: Name, dtype: object
        Name  Age
ID               
101    Alice   20
102      Bob   21
103  Charlie   22
104    Diana   20
105    Ethan   23
106    Fiona   21
107   George   22
108   Hannah   23
109      Ian   20
110     Jane   21
Name       Alice
Age           20
Gender         F
Marks         88
Subject     Math
City       Delhi
Name: 101, dtype: object
Name           Bob
Age             21
Gender           M
Marks           76
Subject    Physics
City        Mumbai
Name: 102, dtype: object
        Name  Age Gender  Marks    Subject       City
ID                                                   
102      Bob   21      M     76    Physics     Mumbai
103  Charlie   22      M     93  Chemistry  Bangalore
105    Ethan   23      M     70    Biology       Pune
106    Fiona   21     

##  Changing Data Types

```python
df['Age'] = df['Age'].astype(float)
```

In [15]:
df['Age'] = df['Age'].astype(float)
print(df['Age'])

✅ WakaTime heartbeat sent at 1752331553.217584
ID
101    20.0
102    21.0
103    22.0
104    20.0
105    23.0
106    21.0
107    22.0
108    23.0
109    20.0
110    21.0
Name: Age, dtype: float64


## Renaming Columns

```python
df.rename(columns={'Name': 'Full Name'}, inplace=True)
```

In [16]:
df.rename(columns={'Name': 'Full Name'}, inplace=True)
print(df)

✅ WakaTime heartbeat sent at 1752331553.273005
    Full Name   Age Gender  Marks    Subject       City
ID                                                     
101     Alice  20.0      F     88       Math      Delhi
102       Bob  21.0      M     76    Physics     Mumbai
103   Charlie  22.0      M     93  Chemistry  Bangalore
104     Diana  20.0      F     85       Math    Chennai
105     Ethan  23.0      M     70    Biology       Pune
106     Fiona  21.0      F     90    Physics      Delhi
107    George  22.0      M     60       Math     Mumbai
108    Hannah  23.0      F     95  Chemistry  Hyderabad
109       Ian  20.0      M     55    Biology      Delhi
110      Jane  21.0      F     82    Physics    Kolkata


## Adding / Dropping Columns

```python
df['Score'] = [85, 90]               # Add new column
df.drop('Score', axis=1, inplace=True)  # Drop column
```

In [17]:
# df['Score'] = [85, 90]               # Add new column
# df.drop('Score', axis=1, inplace=True)  # Drop column

# Error fix later

✅ WakaTime heartbeat sent at 1752331553.330474
