The CSV file format is a popular format supported by many machine learning frameworks. The format is variously referred to "comma-separated values" or "character-separated values."

A CSV file stores tabular data (numbers and text) in plain text form. A CSV file consists of any number of records, separated by line breaks of some kind. Each record consists of fields, separated by a literal comma. In some regions, the separator might be a semi-colon.

Typically, all records have an identical number of fields, and missing values are represented as nulls or empty strings. There are a number of ways to load a CSV file in Python. 


In [70]:
import pandas as pd
from io import StringIO
import requests

In [71]:
df = pd.read_csv('place.csv')

df.head()

Unnamed: 0.1,Unnamed: 0,cgpa,iq,placement
0,0,6.8,123.0,1
1,1,5.9,106.0,0
2,2,5.3,121.0,0
3,3,7.4,132.0,1
4,4,5.8,142.0,0


In [72]:
import requests
from io import StringIO
import pandas as pd

url = "https://people.sc.fsu.edu/~jburkardt/data/csv/hw_200.csv"
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0"
}

req = requests.get(url, headers=headers)
print( 'req :', req)

data = StringIO(req.text)
print( 'data :', data)

df = pd.read_csv(data)

df

req : <Response [200]>
data : <_io.StringIO object at 0x000001CF38D45870>


Unnamed: 0,Index,"Height(Inches)""","""Weight(Pounds)"""
0,1,65.78,112.99
1,2,71.52,136.49
2,3,69.40,153.03
3,4,68.22,142.34
4,5,67.79,144.30
...,...,...,...
195,196,65.80,120.84
196,197,66.11,115.78
197,198,68.24,128.30
198,199,68.02,127.47


In [73]:
df = pd.read_csv('tsv.tsv', sep='\t', names=['Name','Age','City Name'])

df

Unnamed: 0,Name,Age,City Name
0,Name,Age,City
1,Alice,25,New York
2,Bob,30,Los Angeles
3,Charlie,35,Chicago


In [74]:
df = pd.read_csv('place.csv', index_col='Unnamed: 0')

df

Unnamed: 0,cgpa,iq,placement
0,6.8,123.0,1
1,5.9,106.0,0
2,5.3,121.0,0
3,7.4,132.0,1
4,5.8,142.0,0
...,...,...,...
95,4.3,200.0,0
96,4.4,42.0,0
97,6.7,182.0,1
98,6.3,103.0,1


In [75]:
df = pd.read_csv('place.csv', header=1)

df

Unnamed: 0,0,6.8,123.0,1
0,1,5.9,106.0,0
1,2,5.3,121.0,0
2,3,7.4,132.0,1
3,4,5.8,142.0,0
4,5,7.1,48.0,1
...,...,...,...,...
94,95,4.3,200.0,0
95,96,4.4,42.0,0
96,97,6.7,182.0,1
97,98,6.3,103.0,1


In [76]:
df = pd.read_csv('place.csv', usecols=['cgpa','iq'])

df

Unnamed: 0,cgpa,iq
0,6.8,123.0
1,5.9,106.0
2,5.3,121.0
3,7.4,132.0
4,5.8,142.0
...,...,...
95,4.3,200.0
96,4.4,42.0
97,6.7,182.0
98,6.3,103.0


In [77]:
df = pd.read_csv(
    'place.csv',
    skiprows=[0,2,4,5]
)

df

Unnamed: 0,0,6.8,123.0,1
0,2,5.3,121.0,0
1,5,7.1,48.0,1
2,6,5.7,143.0,0
3,7,5.0,63.0,0
4,8,6.1,156.0,0
...,...,...,...,...
91,95,4.3,200.0,0
92,96,4.4,42.0,0
93,97,6.7,182.0,1
94,98,6.3,103.0,1


In [78]:
df = pd.read_csv(
    'place.csv',
    nrows=10
)

df

Unnamed: 0.1,Unnamed: 0,cgpa,iq,placement
0,0,6.8,123.0,1
1,1,5.9,106.0,0
2,2,5.3,121.0,0
3,3,7.4,132.0,1
4,4,5.8,142.0,0
5,5,7.1,48.0,1
6,6,5.7,143.0,0
7,7,5.0,63.0,0
8,8,6.1,156.0,0
9,9,5.1,66.0,0


In [79]:
df = pd.read_csv(
    'place.csv',
    encoding='latin-1'
)

df

Unnamed: 0.1,Unnamed: 0,cgpa,iq,placement
0,0,6.8,123.0,1
1,1,5.9,106.0,0
2,2,5.3,121.0,0
3,3,7.4,132.0,1
4,4,5.8,142.0,0
...,...,...,...,...
95,95,4.3,200.0,0
96,96,4.4,42.0,0
97,97,6.7,182.0,1
98,98,6.3,103.0,1


In [80]:
df = pd.read_csv(
    'place.csv',
    dtype={
        'iq': int,
    }
)

print(df.info())

df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  100 non-null    int64  
 1   cgpa        100 non-null    float64
 2   iq          100 non-null    int32  
 3   placement   100 non-null    int64  
dtypes: float64(1), int32(1), int64(2)
memory usage: 2.9 KB
None


Unnamed: 0.1,Unnamed: 0,cgpa,iq,placement
0,0,6.8,123,1
1,1,5.9,106,0
2,2,5.3,121,0
3,3,7.4,132,1
4,4,5.8,142,0
...,...,...,...,...
95,95,4.3,200,0
96,96,4.4,42,0
97,97,6.7,182,1
98,98,6.3,103,1


In [81]:
df = pd.read_csv(
    'place.csv',
    parse_dates=['iq']
)

print( df.info())

df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  100 non-null    int64  
 1   cgpa        100 non-null    float64
 2   iq          100 non-null    object 
 3   placement   100 non-null    int64  
dtypes: float64(1), int64(2), object(1)
memory usage: 3.2+ KB
None


  df = pd.read_csv(


Unnamed: 0.1,Unnamed: 0,cgpa,iq,placement
0,0,6.8,123.0,1
1,1,5.9,106.0,0
2,2,5.3,121.0,0
3,3,7.4,132.0,1
4,4,5.8,142.0,0
...,...,...,...,...
95,95,4.3,200.0,0
96,96,4.4,42.0,0
97,97,6.7,182.0,1
98,98,6.3,103.0,1


In [82]:
def newPlacement( x ):
    # print(x)
    if x == "1":
        return "Pass"
    else:
        return "Fail"

df = pd.read_csv(
    'place.csv',
    converters={
        'placement' : newPlacement
    }
)

print( df.info())

df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  100 non-null    int64  
 1   cgpa        100 non-null    float64
 2   iq          100 non-null    float64
 3   placement   100 non-null    object 
dtypes: float64(2), int64(1), object(1)
memory usage: 3.2+ KB
None


Unnamed: 0.1,Unnamed: 0,cgpa,iq,placement
0,0,6.8,123.0,Pass
1,1,5.9,106.0,Fail
2,2,5.3,121.0,Fail
3,3,7.4,132.0,Pass
4,4,5.8,142.0,Fail
...,...,...,...,...
95,95,4.3,200.0,Fail
96,96,4.4,42.0,Fail
97,97,6.7,182.0,Pass
98,98,6.3,103.0,Pass


In [83]:
df = pd.read_csv(
    'place.csv',
    na_values=[0]
)

df

Unnamed: 0.1,Unnamed: 0,cgpa,iq,placement
0,,6.8,123.0,1.0
1,1.0,5.9,106.0,
2,2.0,5.3,121.0,
3,3.0,7.4,132.0,1.0
4,4.0,5.8,142.0,
...,...,...,...,...
95,95.0,4.3,200.0,
96,96.0,4.4,42.0,
97,97.0,6.7,182.0,1.0
98,98.0,6.3,103.0,1.0


In [89]:
dfs = pd.read_csv(
    'place.csv',
    chunksize=30
)

for df in dfs:
    print(df.shape)
    # print(df)

(30, 4)
(30, 4)
(30, 4)
(10, 4)
