#Reading CSV files

In [8]:
#Read CSV Files into Pandas DataFrames
import pandas as pd

# Reading a simple csv file
df = pd.read_csv("Ice Cream Sales - temperatures.csv")

# Display the first few rows
print(df.head())

   Temperature  Ice Cream Profits
0           39              13.17
1           40              11.88
2           41              18.82
3           42              18.65
4           43              17.02


In [12]:
#Explore Different CSV Reading Options and Parameters
df = pd.read_csv("Ice Cream Sales - temperatures.csv", 
                 delimiter=",",   # Specify delimiter (e.g., comma or semicolon)
                 header=0,        # Specify which row contains the headers
                 skiprows=2,      # Skip first 2 rows
                 nrows=10)        # Read only the first 10 rows

print(df)

   40  11.88
0  41  18.82
1  42  18.65
2  43  17.02
3  43  15.88
4  44  19.07
5  44  19.57
6  45  21.62
7  45  22.34
8  45  19.23
9  46  21.25


In [16]:
#Handle Missing Values While Reading
# Reading CSV and specifying values to be treated as NaN (missing)
df = pd.read_csv("Ice Cream Sales - temperatures.csv", na_values=["", "NA", "n/a", "None"])

# Check for missing values
print(df.isnull().sum())  # Shows the number of missing values per column

# Filling missing values with a specific value (e.g., 0 or 'Unknown')
df_filled = df.fillna(0)

print(df_filled)

Temperature          0
Ice Cream Profits    0
dtype: int64
     Temperature  Ice Cream Profits
0             39              13.17
1             40              11.88
2             41              18.82
3             42              18.65
4             43              17.02
..           ...                ...
360           99              85.13
361           99              87.08
362           99              89.29
363          101              81.91
364          101              85.02

[365 rows x 2 columns]


In [18]:
#Data Cleaning and Dropping Rows/Columns
# Drop rows with any missing values
df_cleaned = df.dropna()

# Drop columns where all values are missing
df_cleaned_columns = df.dropna(axis=1, how="all")

print(df_cleaned.head())          # DataFrame with rows containing no missing values
print(df_cleaned_columns.head())  # DataFrame with columns containing no missing values

   Temperature  Ice Cream Profits
0           39              13.17
1           40              11.88
2           41              18.82
3           42              18.65
4           43              17.02
   Temperature  Ice Cream Profits
0           39              13.17
1           40              11.88
2           41              18.82
3           42              18.65
4           43              17.02


In [20]:
#Reading Large CSV Files in Chunks
# Read CSV file in chunks
chunk_size = 1000  # Number of rows per chunk
chunks = pd.read_csv("Ice Cream Sales - temperatures.csv", chunksize=chunk_size)

# Process each chunk
for chunk in chunks:
    print(chunk.head())  # Display the first few rows of each chunk

   Temperature  Ice Cream Profits
0           39              13.17
1           40              11.88
2           41              18.82
3           42              18.65
4           43              17.02
