# Section 1: Full Extraction

In [None]:
!pip install pandas

In [None]:
import pandas as pd
from datetime import datetime

# Loading the entire CSV
df_full = pd.read_csv('custom_data.csv', parse_dates=['order_date'])

# Displaying basic information
print(f"Total Rows Extracted: {df_full.shape[0]}")
print(f"Total Columns: {df_full.shape[1]}")
print("\nSample of the data:")
display(df_full.head())

Total Rows Extracted: 100
Total Columns: 6

Sample of the data:


Unnamed: 0,order_id,customer_name,product,order_date,quantity,total_price
0,1,Customer 1,Tablet,2025-06-26 11:18:00,1,583.28
1,2,Customer 2,Camera,2025-06-13 11:27:00,4,2299.2
2,3,Customer 3,Headphones,2025-06-23 22:30:00,3,3682.14
3,4,Customer 4,Headphones,2025-06-10 14:30:00,1,687.12
4,5,Customer 5,Smartphone,2025-06-26 02:40:00,1,990.39


# Section 2: Partial Extraction

In [11]:
# Step 1: Reading the last extraction timestamp from file
with open('last_extraction.txt', 'r') as file:
    last_extraction_time_str = file.read().strip()  # Read and remove whitespace/newlines

# Converting the string to pandas Timestamp for comparison
last_extraction_time = pd.to_datetime(last_extraction_time_str)
print(f"Last Extraction Time: {last_extraction_time}")

# Step 2: Loading the dataset again, ensuring 'order_date' is parsed as datetime
df = pd.read_csv('custom_data.csv', parse_dates=['order_date'])

# Step 3: Filtering rows where 'order_date' is strictly greater than last extraction time
df_incremental = df[df['order_date'] > last_extraction_time]

# Printing how many new rows were extracted
print(f"New Records Extracted: {df_incremental.shape[0]}")

# Displaying the new rows for verification
display(df_incremental)

Last Extraction Time: 2025-06-10 13:52:29
New Records Extracted: 70


Unnamed: 0,order_id,customer_name,product,order_date,quantity,total_price
0,1,Customer 1,Tablet,2025-06-26 11:18:00,1,583.28
1,2,Customer 2,Camera,2025-06-13 11:27:00,4,2299.20
2,3,Customer 3,Headphones,2025-06-23 22:30:00,3,3682.14
3,4,Customer 4,Headphones,2025-06-10 14:30:00,1,687.12
4,5,Customer 5,Smartphone,2025-06-26 02:40:00,1,990.39
...,...,...,...,...,...,...
91,92,Customer 92,Camera,2025-06-12 10:11:00,2,1690.89
92,93,Customer 93,Camera,2025-06-15 16:15:00,2,1253.02
94,95,Customer 95,Headphones,2025-07-04 13:39:00,3,264.65
97,98,Customer 98,Camera,2025-06-17 18:44:00,3,4367.03


# Section 3: Saving new Timestamp

In [10]:
# Get current extraction time
current_time = datetime.now()

# Update the last_extraction.txt file
with open('last_extraction.txt', 'w') as file:
    file.write(current_time.strftime('%Y-%m-%d %H:%M:%S'))

print(f"Extraction timestamp updated to: {current_time}")

Extraction timestamp updated to: 2025-06-10 13:52:29.132162
