In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("data/Mercedesbenz.csv")

# Peek few rows of a DataFrame
df.head()

Quick facts sheet for your DataFrame — it tells you:
- How many rows and columns there are
- Each column’s name and data type (int64, float64, object, etc.)
- How many non-missing values each column has
- Rough memory usage

In [None]:
df.info()

Provides summary statistics tool it gives you quick math facts about your numeric columns (and sometimes non-numeric, if you ask).

- By default, it works on numeric data and shows:
- count: how many non-missing values
- mean: average
- std: standard deviation (spread of values)
- min: smallest value
- 25%, 50%, 75%: quartiles (Q1, median, Q3) 
- max  largest value

In [None]:
df.describe()

In [20]:
# Get the unique category counts
df['X0'].value_counts()

X0
z     360
ak    349
y     324
ay    313
t     306
x     300
o     269
f     227
n     195
w     182
j     181
az    175
aj    151
s     106
ap    103
h      75
d      73
al     67
v      36
af     35
ai     34
m      34
e      32
ba     27
at     25
a      21
ax     19
aq     18
i      18
am     18
u      17
aw     16
l      16
ad     14
k      11
au     11
b      11
r      10
as     10
bc      6
ao      4
c       3
q       2
aa      2
ac      1
g       1
ab      1
Name: count, dtype: int64

In [26]:
# Return column y, where value is greater than 150
df[df['y']>150]

Unnamed: 0,ID,y,X0,X1,X2,X3,X4,X5,X6,X8,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
253,505,150.43,t,b,as,c,d,i,l,x,...,0,0,1,0,0,0,0,1,0,0
342,681,169.91,aa,l,ak,f,d,i,c,d,...,0,0,0,0,0,0,0,0,0,0
429,836,154.87,ak,l,ae,f,d,d,g,w,...,0,0,0,0,0,0,0,0,0,0
883,1770,265.32,y,r,ai,f,d,ag,l,t,...,0,0,0,0,0,0,0,0,0,0
889,1784,158.53,aj,l,as,f,d,ag,k,e,...,0,0,0,0,0,0,0,0,0,0
1060,2111,154.43,w,v,r,c,d,ag,d,q,...,1,0,0,0,0,0,0,0,0,0
1203,2396,160.87,j,o,as,f,d,ab,g,p,...,1,0,0,0,0,0,0,0,0,0
1205,2403,150.89,x,b,m,c,d,ab,j,j,...,0,0,1,0,0,0,0,0,0,0
1269,2511,152.32,s,aa,m,c,d,ab,g,g,...,1,0,0,0,0,0,0,0,0,0
1459,2903,167.45,ai,b,ae,a,d,ac,g,m,...,0,0,1,0,0,0,0,0,0,0


In [29]:
from io import StringIO, BytesIO

# io.StringIO 
- is like a pretend file in Python — it lets you treat a string as if it were a file you could read from or write to.
- It comes from Python’s built-in io module:

In [30]:
data = ('Col1,Col2,Col3\n'
        'x,y,1\n'
        'a,b,2\n'
        'u,v,7\n')

print(type(data)) # str

pd.read_csv(StringIO(data))

<class 'str'>


Unnamed: 0,Col1,Col2,Col3
0,x,y,1
1,a,b,2
2,u,v,7


In [None]:
# Select specified Columns
pd.read_csv(StringIO(data), usecols=['Col1', 'Col3'])

Unnamed: 0,Col1,Col3
0,x,1
1,a,2
2,u,7


In [47]:
data_2 = (
    'a,b,c,d\n'
    '1,2,3,4\n'
    '5,6,7,8\n'
    '9,10,11\n')

# Specify the data type rows. such object i.e. string
data_read2 = pd.read_csv(StringIO(data_2), dtype=object) 
data_read2


data_read2['a'][1]

'5'

In [46]:
data_3= (
    'a,b,c,d\n'
    '1,2,3,4\n'
    '5,6,7,8\n'
    '9,10,11\n')

data_read3 = pd.read_csv(StringIO(data_3), dtype={'a': int, 'b': float, 'c': int, 'd': object})
data_read3['a'][1]

np.int64(5)

In [51]:
#Specify first Column as index
data_4= (
    'index,b,c,d\n'
    '1,2,3,4\n'
    '5,6,7,8\n'
    '9,10,11, 12\n')

data_read4 = pd.read_csv(StringIO(data_3), dtype={'a': int, 'b': float, 'c': int, 'd': object})
data_read4
# 0 is Index. Make first column as index
data_read5 = pd.read_csv(StringIO(data_4), index_col=0)
data_read5

Unnamed: 0_level_0,b,c,d
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2,3,4
5,6,7,8
9,10,11,12


In [None]:
data_5 = ('a,b,c\n'
    '4,Banana,Sun,\n'
    '5,Apple,Saturn,\n')

data_read5 = pd.read_csv(StringIO(data_5))
data_read5

# Do not use first column as Index
data_read6 = pd.read_csv(StringIO(data_5), index_col=False)
data_read6

# useColumns and skip index
data_read7 = pd.read_csv(StringIO(data_5), usecols=['b', 'c'], index_col=False)
data_read7

Unnamed: 0,b,c
0,Banana,Sun
1,Apple,Saturn


In [65]:
## Quoting and Escape Characters
data8 = 'a,b\n "Hello \"Sam\", "Howdy?'

data_read8 = pd.read_csv(StringIO(data8), escapechar='\\')
data_read8


Unnamed: 0,a,b
0,"""Hello ""Sam""","""Howdy?"


In [None]:
# Read data from URL
data9 = pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item', sep='\t')
data9.head()

HTTPError: HTTP Error 403: Forbidden