In [5]:
#  Import required libraries
import pandas as pd
from datetime import datetime
import os

#  File paths in Google Colab
csv_file = '/content/data.csv'
last_extraction_file = '/content/last_extraction.txt'

#  Function: Full Extraction
def full_extraction():
    df = pd.read_csv(csv_file)
    print(f"Extracted {len(df)} rows fully.")
    print(f"\nDataset shape: {df.shape}")
    display(df.head())
    return df

#  Function: Incremental Extraction
def incremental_extraction():
    # Check for last_extraction.txt or create it if missing
    if not os.path.exists(last_extraction_file):
        with open(last_extraction_file, 'w') as f:
            f.write('2000-01-01 00:00:00')

    with open(last_extraction_file, 'r') as f:
        last_time_str = f.read().strip()
    last_time = datetime.strptime(last_time_str, '%Y-%m-%d %H:%M:%S')

    # Load real data
    df = pd.read_csv(csv_file)

    # Confirm which timestamp column exists in your CSV
    print(f"\nAvailable columns: {list(df.columns)}")

    # Replace 'timestamp' with your actual timestamp column name
    if 'timestamp' not in df.columns:
        print("No 'timestamp' column found in your CSV. Incremental extraction requires a timestamp.")
        return pd.DataFrame()

    # Convert to datetime and filter
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    new_data = df[df['timestamp'] > last_time]

    print(f"Extracted {len(new_data)} rows incrementally since {last_time_str}.")
    display(new_data.head())
    return new_data

#  Function: Save New Extraction Timestamp
def save_new_extraction_time():
    current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    with open(last_extraction_file, 'w') as f:
        f.write(current_time)
    print(f" New extraction timestamp saved: {current_time}")

#  Run Full Extraction
print("FULL EXTRACTION")
full_data = full_extraction()

#  Run Incremental Extraction
print("\n INCREMENTAL EXTRACTION")
incremental_data = incremental_extraction()

#  Save new timestamp
save_new_extraction_time()

from google.colab import files
files.download('/content/last_extraction.txt')


FULL EXTRACTION
Extracted 10 rows fully.

Dataset shape: (10, 3)


Unnamed: 0,id,value,timestamp
0,0,0,2024-01-01
1,1,10,2024-01-02
2,2,20,2024-01-03
3,3,30,2024-01-04
4,4,40,2024-01-05



 INCREMENTAL EXTRACTION

Available columns: ['id', 'value', 'timestamp']
Extracted 0 rows incrementally since 2025-06-10 18:42:30.


Unnamed: 0,id,value,timestamp


 New extraction timestamp saved: 2025-06-10 19:02:31


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>