In [2]:
import pandas as pd

In [3]:
import numpy as np

In [4]:
# Create a large dataset of 5 million telecom records
rows = 5_000_000
np.random.seed(42)


In [5]:
data = {
    'customer_id': np.arange(1, rows + 1),
    'data_used_gb': np.random.uniform(0.1, 100, size=rows).round(2),
    'call_duration': np.random.randint(1, 500, size=rows),
    'region': np.random.choice(['Delhi', 'Mumbai', 'Chennai', 'Kolkata', 'Bangalore'], size=rows)
}


In [6]:
df = pd.DataFrame(data)
df.to_csv('big_telecom_data.csv', index=False)


In [7]:
print("✅ big_telecom_data.csv created successfully with 5 million rows.")

✅ big_telecom_data.csv created successfully with 5 million rows.


In [8]:
import pandas as pd

In [9]:
# Step 1: Read file in chunks of 100,000 rows
chunks = pd.read_csv('big_telecom_data.csv', chunksize=100000)


In [10]:
# Step 2: Initialize counters
total_usage = 0
heavy_users = 0


In [11]:
# Step 3: Process each chunk separately
for chunk in chunks:
    total_usage += chunk['data_used_gb'].sum()           # incremental sum
    heavy_users += (chunk['data_used_gb'] > 50).sum()    # count high-usage users


In [12]:
# Step 4: Print results
print(f"✅ Total data usage: {total_usage:,.2f} GB")
print(f"✅ Heavy users (>50GB): {heavy_users:,}")


✅ Total data usage: 250,243,319.20 GB
✅ Heavy users (>50GB): 2,501,641


In [13]:
import pandas as pd
from tqdm import tqdm   # pip install tqdm


In [14]:
chunks = pd.read_csv('big_telecom_data.csv', chunksize=100000)
total_usage = 0
heavy_users = 0


In [15]:
for chunk in tqdm(chunks, desc="Processing..."):
    total_usage += chunk['data_used_gb'].sum()
    heavy_users += (chunk['data_used_gb'] > 50).sum()

print(f"\n✅ Total Data Usage: {total_usage:,.2f} GB")
print(f"✅ Heavy Users (>50GB): {heavy_users:,}")


Processing...: 50it [00:01, 28.61it/s]


✅ Total Data Usage: 250,243,319.20 GB
✅ Heavy Users (>50GB): 2,501,641





In [17]:
print("Average data used per user (approx.):", round(total_usage / 5_000_000, 2), "GB")

Average data used per user (approx.): 50.05 GB
