In [None]:
import pandas as pd

#### **1**. **Loading** **Dataset**

In [None]:
df = pd.read_csv('/content/sample_data/california_housing_test.csv')

In [None]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0


#### 2. **Loading data from URL**

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/datasets/covid-19/refs/heads/main/data/us_deaths.csv')

In [None]:
df.head()

Unnamed: 0,Admin2,Date,Case,Country/Region,Province/State
0,Autauga,2020-01-22,0,US,Alabama
1,Autauga,2020-01-23,0,US,Alabama
2,Autauga,2020-01-24,0,US,Alabama
3,Autauga,2020-01-25,0,US,Alabama
4,Autauga,2020-01-26,0,US,Alabama


#### sep Parameter

In [None]:
df = pd.read_csv('/content/titanic.tsv', sep = '\t')

In [None]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,\N,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,\N,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,\N,S


##### if 1st row of the dataset is used as column names, use the parameter 'names' and pass the list of column names

#### 3. `index_col` **Parameter**
to make any column as indices

In [None]:
df = pd.read_csv('/content/sample_data/california_housing_test.csv', index_col = 'longitude')

In [None]:
df.head()

Unnamed: 0_level_0,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
longitude,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599,176500.0
-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0


#### **4. `header` Parameter**
 It accepts int, a list of int, row numbers to use as the column names, and the start of the data. If no names are passed, i.e., header=None, then, it will display the first column as 0, the second as 1, and so on.

#### **5. `use_cols` Parameter**
Retrieves only selected columns from the CSV file.

In [None]:
df = pd.read_csv('/content/sample_data/california_housing_test.csv', usecols = ['longitude', 'latitude'])

In [None]:
df.head()

Unnamed: 0,longitude,latitude
0,-122.05,37.37
1,-118.3,34.26
2,-117.81,33.78
3,-118.36,33.82
4,-119.67,36.33


#### **6. `squeeze` Paramenter**
used to automatically convert a DataFrame to a Series if only one column was present in the data

#### **7. `skiprows` Parameter**
offers a way to omit specific rows when reading data from a CSV file

#### **8. `nrows` Parameter**
import only n rows from the dataset

#### **9. `encoding` Parameter**
change the encoding of a dataset


#### **10. `error_bad_lines` Parameter**
 skip lines with too many fields





#### **11. `dtypes` Parameter**

the dtype parameter lets you specify the data type of each column while
reading a CSV file.


#### **12. `parse_dates` Parameter**

The parse_dates parameter in read_csv() is used to automatically convert columns containing date and time strings into datetime64 objects in pandas.

#### **13. Converters**


In [None]:
def rename(name):
  if(name == "Alabama"):
    return "Alb"
  else:
    return name

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/datasets/covid-19/refs/heads/main/data/us_deaths.csv', converters= {'Province/State':rename})

In [None]:
df.head()

Unnamed: 0,Admin2,Date,Case,Country/Region,Province/State
0,Autauga,2020-01-22,0,US,Alb
1,Autauga,2020-01-23,0,US,Alb
2,Autauga,2020-01-24,0,US,Alb
3,Autauga,2020-01-25,0,US,Alb
4,Autauga,2020-01-26,0,US,Alb


#### **14. `na_values` Parameter**

The na_values parameter in read_csv() is used to specify custom values that should be treated as NaN (missing values) when reading a CSV file.

####  **15. Loading huge dataset in chunks**

`chunksize`: The chunksize parameter in read_csv() allows you to read large CSV files in small chunks (batches) instead of loading the entire file into memory at once. This is useful for handling very large files that may not fit into memory.[link text](https://)

In [None]:
df = pd.read_csv('/content/sample_data/california_housing_test.csv', chunksize = 100)

In [None]:
for chunks in df:
  print(chunks.shape)

(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
(100, 9)
