# HANDLING CSV FILES.

In [1]:
import pandas as pd
import numpy as np
import io

# Create a CSV file with healthcare-related data
csv_data = """PatientID,Name,Age,Gender,Diagnosis,BloodPressure,HeartRate
P001,John Smith,45,Male,Hypertension,140/90,80
P002,Emily Davis,38,Female,Diabetes,130/85,76
P003,Michael Brown,50,Male,Cardiovascular Disease,150/95,85
P004,Sarah Wilson,29,Female,Asthma,120/80,72
P005,David Lee,60,Male,Arthritis,135/88,78
"""

# Read the CSV data into a DataFrame
data = pd.read_csv(io.StringIO(csv_data))
print(data)


  PatientID           Name  Age  Gender               Diagnosis BloodPressure  \
0      P001     John Smith   45    Male            Hypertension        140/90   
1      P002    Emily Davis   38  Female                Diabetes        130/85   
2      P003  Michael Brown   50    Male  Cardiovascular Disease        150/95   
3      P004   Sarah Wilson   29  Female                  Asthma        120/80   
4      P005      David Lee   60    Male               Arthritis        135/88   

   HeartRate  
0         80  
1         76  
2         85  
3         72  
4         78  


In [3]:
# Convert the CSV-formatted string into a pandas DataFrame
df = pd.read_csv(io.StringIO(csv_data))

# Export the DataFrame content into a CSV file named 'patient_data.csv' without row indices
df.to_csv("patient_data.csv", index=False)

In [7]:
# Display the first few records to get a quick preview of the dataset
print(data.head())

  PatientID           Name  Age  Gender               Diagnosis BloodPressure  \
0      P001     John Smith   45    Male            Hypertension        140/90   
1      P002    Emily Davis   38  Female                Diabetes        130/85   
2      P003  Michael Brown   50    Male  Cardiovascular Disease        150/95   
3      P004   Sarah Wilson   29  Female                  Asthma        120/80   
4      P005      David Lee   60    Male               Arthritis        135/88   

   HeartRate  
0         80  
1         76  
2         85  
3         72  
4         78  


# EXTRATING CSV INTO PANDAS DATAFRAME.

In [9]:
# Show a summary of the dataset: column types, non-null counts, memory usage, etc.
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   PatientID      5 non-null      object
 1   Name           5 non-null      object
 2   Age            5 non-null      int64 
 3   Gender         5 non-null      object
 4   Diagnosis      5 non-null      object
 5   BloodPressure  5 non-null      object
 6   HeartRate      5 non-null      int64 
dtypes: int64(2), object(5)
memory usage: 412.0+ bytes
None


In [11]:
print(data.shape)

(5, 7)


In [22]:
print(data.describe())

             Age  HeartRate
count   5.000000   5.000000
mean   44.400000  78.200000
std    11.760102   4.816638
min    29.000000  72.000000
25%    38.000000  76.000000
50%    45.000000  78.000000
75%    50.000000  80.000000
max    60.000000  85.000000


In [24]:
print(data.columns)

Index(['PatientID', 'Name', 'Age', 'Gender', 'Diagnosis', 'BloodPressure',
       'HeartRate'],
      dtype='object')


In [26]:
#printing values in a dataframe 
print(data.values)

[['P001' 'John Smith' 45 'Male' 'Hypertension' '140/90' 80]
 ['P002' 'Emily Davis' 38 'Female' 'Diabetes' '130/85' 76]
 ['P003' 'Michael Brown' 50 'Male' 'Cardiovascular Disease' '150/95' 85]
 ['P004' 'Sarah Wilson' 29 'Female' 'Asthma' '120/80' 72]
 ['P005' 'David Lee' 60 'Male' 'Arthritis' '135/88' 78]]


# EXCEPTION HANDLING WHILE EXTRACTING.

In [31]:
#extracting the contents of csv into df using pandas and handling the exceptions
import pandas as pd
try:
    df=pd.read_csv("patient_data.csv") #reading csv file
    print(df.head()) #displaying top 5 rows
except FileNotFoundError:
    print("File not found at the location") # handling errors in a customised way
except Excption as e:
    print(f"Error occured: {e}") # displaying the error message

  PatientID           Name  Age  Gender               Diagnosis BloodPressure  \
0      P001     John Smith   45    Male            Hypertension        140/90   
1      P002    Emily Davis   38  Female                Diabetes        130/85   
2      P003  Michael Brown   50    Male  Cardiovascular Disease        150/95   
3      P004   Sarah Wilson   29  Female                  Asthma        120/80   
4      P005      David Lee   60    Male               Arthritis        135/88   

   HeartRate  
0         80  
1         76  
2         85  
3         72  
4         78  


# 1.READ A CSV FILE

In [13]:
import pandas as pd

# Load the patient dataset from the CSV file into a DataFrame
df = pd.read_csv("patient_data.csv")

# Display the full contents of the DataFrame
print(df)


  PatientID           Name  Age  Gender               Diagnosis BloodPressure  \
0      P001     John Smith   45    Male            Hypertension        140/90   
1      P002    Emily Davis   38  Female                Diabetes        130/85   
2      P003  Michael Brown   50    Male  Cardiovascular Disease        150/95   
3      P004   Sarah Wilson   29  Female                  Asthma        120/80   
4      P005      David Lee   60    Male               Arthritis        135/88   

   HeartRate  
0         80  
1         76  
2         85  
3         72  
4         78  


# 2.READ A CSV FILE CHUNK BY CHUNK

In [15]:
chunk_size = 2   # Smaller size used here for demonstration since dataset is small
count = 1

# Process the patient data file in smaller segments instead of reading it all at once
for chunk in pd.read_csv("patient_data.csv", chunksize=chunk_size):  # Read 2 rows at a time
    print(f"Patient Data Chunk {count}:")
    print(chunk)
    count += 1


Patient Data Chunk 1:
  PatientID         Name  Age  Gender     Diagnosis BloodPressure  HeartRate
0      P001   John Smith   45    Male  Hypertension        140/90         80
1      P002  Emily Davis   38  Female      Diabetes        130/85         76
Patient Data Chunk 2:
  PatientID           Name  Age  Gender               Diagnosis BloodPressure  \
2      P003  Michael Brown   50    Male  Cardiovascular Disease        150/95   
3      P004   Sarah Wilson   29  Female                  Asthma        120/80   

   HeartRate  
2         85  
3         72  
Patient Data Chunk 3:
  PatientID       Name  Age Gender  Diagnosis BloodPressure  HeartRate
4      P005  David Lee   60   Male  Arthritis        135/88         78


# ALTERNATE METHOD

In [44]:
import pandas as pd

# Process the patient dataset file incrementally in chunks of 2 rows
with pd.read_csv("patient_data.csv", chunksize=2) as reader:
    print(reader)  # Display the TextFileReader object reference
    
    # Iterate through each chunk and display the first 2 rows of the chunk
    for chunk in reader:
        print(chunk.head(2))


<pandas.io.parsers.readers.TextFileReader object at 0x000001E8823D1E80>
  PatientID         Name  Age  Gender     Diagnosis BloodPressure  HeartRate
0      P001   John Smith   45    Male  Hypertension        140/90         80
1      P002  Emily Davis   38  Female      Diabetes        130/85         76
  PatientID           Name  Age  Gender               Diagnosis BloodPressure  \
2      P003  Michael Brown   50    Male  Cardiovascular Disease        150/95   
3      P004   Sarah Wilson   29  Female                  Asthma        120/80   

   HeartRate  
2         85  
3         72  
  PatientID          Name  Age  Gender  Diagnosis BloodPressure  HeartRate
4      P005     David Lee   60    Male  Arthritis        135/88         78
5      P006  Laura Martin   55  Female   Diabetes        128/85         79
  PatientID         Name  Age Gender     Diagnosis BloodPressure  HeartRate
6      P007  James White   42   Male  Hypertension        145/92         82


# USAGE OF ENUMERATE METHOD FOR CHUNKING

In [48]:
import pandas as pd

chunk_size = 2  # Small chunk size for demonstration purposes

# Loop through the patient data file in small chunks with an index counter
for i, chunk in enumerate(pd.read_csv("patient_data.csv", chunksize=chunk_size)):
    print(f"\n--- Patient Data Chunk {i + 1} ---")
    print(chunk.head(2))   # Display the first two records in the current chunk



--- Patient Data Chunk 1 ---
  PatientID         Name  Age  Gender     Diagnosis BloodPressure  HeartRate
0      P001   John Smith   45    Male  Hypertension        140/90         80
1      P002  Emily Davis   38  Female      Diabetes        130/85         76

--- Patient Data Chunk 2 ---
  PatientID           Name  Age  Gender               Diagnosis BloodPressure  \
2      P003  Michael Brown   50    Male  Cardiovascular Disease        150/95   
3      P004   Sarah Wilson   29  Female                  Asthma        120/80   

   HeartRate  
2         85  
3         72  

--- Patient Data Chunk 3 ---
  PatientID          Name  Age  Gender  Diagnosis BloodPressure  HeartRate
4      P005     David Lee   60    Male  Arthritis        135/88         78
5      P006  Laura Martin   55  Female   Diabetes        128/85         79

--- Patient Data Chunk 4 ---
  PatientID         Name  Age Gender     Diagnosis BloodPressure  HeartRate
6      P007  James White   42   Male  Hypertension        1

# 3. APPEND TO A CSV

In [34]:
import pandas as pd

# Define new patient records to be added
new_data = {
    "PatientID": ["P006", "P007"],
    "Name": ["Laura Martin", "James White"],
    "Age": [55, 42],
    "Gender": ["Female", "Male"],
    "Diagnosis": ["Diabetes", "Hypertension"],
    "BloodPressure": ["128/85", "145/92"],
    "HeartRate": [79, 82]
}

new_df = pd.DataFrame(new_data)

# Add the new patient records to the existing patient data file
new_df.to_csv("patient_data.csv", mode="a", header=False, index=False)

In [36]:
# Display the last two newly added records from the new DataFrame
last_two_new = new_df.tail(2)
print("Last 2 newly created patient records:")
print(last_two_new)

Last 2 newly created patient records:
  PatientID          Name  Age  Gender     Diagnosis BloodPressure  HeartRate
0      P006  Laura Martin   55  Female      Diabetes        128/85         79
1      P007   James White   42    Male  Hypertension        145/92         82


In [38]:
# Retrieve the last two rows using index positions
last_two_new_alt = new_df.iloc[-2:]
print("\nLast 2 records accessed by index position:")
print(last_two_new_alt)


Last 2 records accessed by index position:
  PatientID          Name  Age  Gender     Diagnosis BloodPressure  HeartRate
0      P006  Laura Martin   55  Female      Diabetes        128/85         79
1      P007   James White   42    Male  Hypertension        145/92         82


In [40]:
# Simulate reading from the full dataset after appending new data
full_data = pd.concat([df, new_df], ignore_index=True)

In [42]:
# Access the newly added rows by their PatientID values
access_rows = full_data[full_data["PatientID"].isin(["P006", "P007"])]
print("\nNewly added patient records accessed by PatientID:")
print(access_rows)


Newly added patient records accessed by PatientID:
  PatientID          Name  Age  Gender     Diagnosis BloodPressure  HeartRate
5      P006  Laura Martin   55  Female      Diabetes        128/85         79
6      P007   James White   42    Male  Hypertension        145/92         82


# 4.WRITE NUMERIC DATA INTO CSV FILE

In [51]:
import pandas as pd
import numpy as np

# Create a dataset simulating sensor measurements
sensor_data = pd.DataFrame({
    "SensorID": np.arange(101, 111),  # Sensor IDs from 101 to 110
    "Temperature_C": np.random.randint(20, 35, size=10),    # Temperature values in Celsius
    "Humidity_%": np.random.uniform(30.0, 90.0, size=10).round(2)  # Humidity in percentage
})

# Save the sensor data to a CSV file
sensor_data.to_csv("sensor_data.csv", index=False)

print("Sensor data CSV file created successfully!")
print(sensor_data)

Sensor data CSV file created successfully!
   SensorID  Temperature_C  Humidity_%
0       101             26       62.29
1       102             31       89.92
2       103             28       79.48
3       104             30       36.37
4       105             34       48.26
5       106             31       60.92
6       107             34       86.46
7       108             24       31.72
8       109             23       83.99
9       110             33       75.86


# 5.WRITE TEXT DATA INTO CSV FILE

In [53]:
import pandas as pd

# Create a dataset with book details
book_data = pd.DataFrame({
    "Title": ["The Alchemist", "1984", "To Kill a Mockingbird", "The Great Gatsby", "Moby Dick"],
    "Author": ["Paulo Coelho", "George Orwell", "Harper Lee", "F. Scott Fitzgerald", "Herman Melville"],
    "Genre": ["Fiction", "Dystopian", "Classic", "Classic", "Adventure"]
})

# Save the book information into a CSV file
book_data.to_csv("book_data.csv", index=False)

print("Book data CSV written successfully!")
print(book_data)

Book data CSV written successfully!
                   Title               Author      Genre
0          The Alchemist         Paulo Coelho    Fiction
1                   1984        George Orwell  Dystopian
2  To Kill a Mockingbird           Harper Lee    Classic
3       The Great Gatsby  F. Scott Fitzgerald    Classic
4              Moby Dick      Herman Melville  Adventure
