### Pandas

- Series
- Dataframes
- Missing data
- Groupby
- Merging, Joining and Concating
- Operations
- Data Input and Output

#### Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
labels = ['a','b','c']
my_data = [10,20,30]
arr = np.array(my_data)
d = {'a':10,'b':20,'c':30}

In [5]:
pd.Series(data = my_data)

0    10
1    20
2    30
dtype: int64

In [6]:
pd.Series(data = my_data, index=labels)

a    10
b    20
c    30
dtype: int64

In [7]:
pd.Series(my_data, labels)

a    10
b    20
c    30
dtype: int64

In [8]:
pd.Series(arr)

0    10
1    20
2    30
dtype: int64

In [9]:
pd.Series(d)

a    10
b    20
c    30
dtype: int64

In [11]:
pd.Series(data=labels) # A series can hold any type of data in python

0    a
1    b
2    c
dtype: object

In [12]:
pd.Series(data = [sum,print,len])

0      <built-in function sum>
1    <built-in function print>
2      <built-in function len>
dtype: object

In [13]:
ser1 = pd.Series([1,2,3,4],["USA","Germany","USSR","Japan"])

In [14]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [15]:
ser2 = pd.Series([1,2,5,4],["USA","Germany","Italy","Japan"])

In [16]:
ser2

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

In [17]:
ser1["USA"]

np.int64(1)

In [18]:
ser1.index

Index(['USA', 'Germany', 'USSR', 'Japan'], dtype='object')

In [21]:
ser3 = pd.Series(data=labels)
ser3

0    a
1    b
2    c
dtype: object

In [20]:
ser3[0]

'a'

In [23]:
ser1 + ser2 # Nan - not a number makes the series or dataframes by default as float

Germany    4.0
Italy      NaN
Japan      8.0
USA        2.0
USSR       NaN
dtype: float64

#### DataFrames

In [25]:
import numpy as np
import pandas as pd

In [26]:
from numpy.random import randn

In [27]:
np.random.seed(101)

In [29]:
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])

In [30]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [31]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [34]:
type(df)

pandas.core.frame.DataFrame

In [35]:
type(df['W'])

pandas.core.series.Series

In [39]:
df[['W','X']]

Unnamed: 0,W,X
A,2.70685,0.628133
B,0.651118,-0.319318
C,-2.018168,0.740122
D,0.188695,-0.758872
E,0.190794,1.978757


In [41]:
df['new'] = df['W'] + df['Y']

In [42]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [None]:
df.drop('new',axis=1) # axis=1 means column

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [47]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [None]:
df.drop('E') # By default axis=0 (row)

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542


In [50]:
df.shape

(5, 5)

In [None]:
df.loc['A'] # To get specific row by label

W      2.706850
X      0.628133
Y      0.907969
Z      0.503826
new    3.614819
Name: A, dtype: float64

In [55]:
df.iloc[0] # To get specific row by index

W      2.706850
X      0.628133
Y      0.907969
Z      0.503826
new    3.614819
Name: A, dtype: float64

In [58]:
df.loc['B',"W"]

np.float64(0.6511179479432686)

In [60]:
df.iloc[1,0]

np.float64(0.6511179479432686)

In [61]:
df.loc[['A','B'],['W','Y']]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077


In [62]:
df.iloc[[0,1],[0,2]]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077


In [63]:
df > 0

Unnamed: 0,W,X,Y,Z,new
A,True,True,True,True,True
B,True,False,False,True,False
C,False,True,True,False,False
D,True,False,False,True,False
E,True,True,True,True,True


In [None]:
df[df>0] # get NaN which are failed with condition

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,,,0.605965,
C,,0.740122,0.528813,,
D,0.188695,,,0.955057,
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [66]:
df['W'] > 0

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [67]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [68]:
df[df['Z']<0]

Unnamed: 0,W,X,Y,Z,new
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355


In [69]:
resultDf = df[df['W']>0]

In [71]:
resultDf['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [72]:
df[df['W']>0]['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [73]:
df[df['W']>0][['Y','X']]

Unnamed: 0,Y,X
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


In [81]:
boolser = df['W'] > 0
result = df[boolser]
mycols = ['Y','X']
result[mycols]

Unnamed: 0,Y,X
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


In [None]:
df[(df['W']>0) and (df['Y'] > 1)] # and operator does not take series of boolean values

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [83]:
df[(df['W']>0) & (df['Y'] > 1)]

Unnamed: 0,W,X,Y,Z,new
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [84]:
df[(df['W']>0) | (df['Y'] > 1)]

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [86]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [87]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z,new
0,A,2.70685,0.628133,0.907969,0.503826,3.614819
1,B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
2,C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
3,D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
4,E,0.190794,1.978757,2.605967,0.683509,2.796762


In [88]:
df.index

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

In [89]:
newind = 'CA NY WY OR CO'.split()

In [90]:
newind

['CA', 'NY', 'WY', 'OR', 'CO']

In [91]:
df['States'] = newind

In [92]:
df

Unnamed: 0,W,X,Y,Z,new,States
A,2.70685,0.628133,0.907969,0.503826,3.614819,CA
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959,NY
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355,WY
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542,OR
E,0.190794,1.978757,2.605967,0.683509,2.796762,CO


In [94]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z,new
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CA,2.70685,0.628133,0.907969,0.503826,3.614819
NY,0.651118,-0.319318,-0.848077,0.605965,-0.196959
WY,-2.018168,0.740122,0.528813,-0.589001,-1.489355
OR,0.188695,-0.758872,-0.933237,0.955057,-0.744542
CO,0.190794,1.978757,2.605967,0.683509,2.796762


In [None]:
# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]


In [99]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [101]:
df = pd.DataFrame(randn(6,2), hier_index,['A','B'])

In [102]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.497104,-0.75407
G1,2,-0.943406,0.484752
G1,3,-0.116773,1.901755
G2,1,0.238127,1.996652
G2,2,-0.993263,0.1968
G2,3,-1.136645,0.000366


In [105]:
df.loc['G1']

Unnamed: 0,A,B
1,-0.497104,-0.75407
2,-0.943406,0.484752
3,-0.116773,1.901755


In [106]:
df.loc['G1'].loc[1]

A   -0.497104
B   -0.754070
Name: 1, dtype: float64

In [108]:
df.index.names = ['Groups','Num']

In [109]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.497104,-0.75407
G1,2,-0.943406,0.484752
G1,3,-0.116773,1.901755
G2,1,0.238127,1.996652
G2,2,-0.993263,0.1968
G2,3,-1.136645,0.000366


In [110]:
df.index.names 

FrozenList(['Groups', 'Num'])

In [115]:
df.loc['G2'].loc[2]['B']

np.float64(0.19679950499134005)

In [120]:
df.xs(1,level='Num')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-0.497104,-0.75407
G2,0.238127,1.996652


#### Missing Data

In [121]:
import numpy as np
import pandas as pd

In [122]:
d = {'A':[1,2,np.nan],'B':[5,np.nan,np.nan],'C':[1,2,3]}

In [125]:
df = pd.DataFrame(d)
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [128]:
df.dropna() # remove the rows which contains nan values. Default axis=0 means rows

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [129]:
df.dropna(axis=1)

Unnamed: 0,C
0,1
1,2
2,3


In [131]:
df.dropna(thresh=2) # accept if row contains two non-nan values

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


In [132]:
df.fillna(value="Fill value")

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,Fill value,2
2,Fill value,Fill value,3


In [134]:
df['A'].fillna(value=df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64

#### GroupBy

- Group the rows by column and perform an aggregate function on them 

In [136]:
import numpy as np
import pandas as pd

In [141]:
data = {'Company':["GOOG",'GOOG',"MSFT","MSFT","FB","FB"],
        "Person":["Sam","Charlie","Amy",'Vanessa',"Carl","Sarah"],
        "Sales":[200,120,340,124,243,350]}

In [142]:
df = pd.DataFrame(data)

In [143]:
df

Unnamed: 0,Company,Person,Sales
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,340
3,MSFT,Vanessa,124
4,FB,Carl,243
5,FB,Sarah,350


In [150]:
byComp = df.groupby("Company")

In [154]:
byComp.sum()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
FB,CarlSarah,593
GOOG,SamCharlie,320
MSFT,AmyVanessa,464


In [155]:
byComp.sum().loc['FB']

Person    CarlSarah
Sales           593
Name: FB, dtype: object

In [156]:
byComp.count()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
FB,2,2
GOOG,2,2
MSFT,2,2


In [157]:
byComp.max()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
FB,Sarah,350
GOOG,Sam,200
MSFT,Vanessa,340


In [160]:
df.groupby('Company').describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0
GOOG,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0
MSFT,2.0,232.0,152.735065,124.0,178.0,232.0,286.0,340.0


#### Merging, Joining and Concatenating

In [164]:
import pandas as pd

In [170]:
df1 = pd.DataFrame({'A':['A0','A1','A2','A3'],
                    'B':['B0','B1','B2','B3'],
                    'C':['C0','C1','C2','C3'],
                    'D':['D0','D1','D2','D3']}, index=[0, 1, 2, 3])

In [168]:
df2 = pd.DataFrame({'A':['A4','A5','A6','A7'],
                    'B':['B4','B5','B6','B7'],
                    'C':['C4','C5','C6','C7'],
                    'D':['D4','D5','D6','D7']}, index=[4, 5, 6, 7])

In [169]:
df3 = pd.DataFrame({'A':['A8','A9','A10','A11'],
                    'B':['B8','B9','B10','B11'],
                    'C':['C8','C9','C10','C11'],
                    'D':['D8','D9','D10','D11']}, index=[8, 9, 10, 11])

##### Concatenation

In [172]:
pd.concat([df1,df2,df3])

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


In [173]:
pd.concat([df1,df2,df3],axis=1)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1,A.2,B.2,C.2,D.2
0,A0,B0,C0,D0,,,,,,,,
1,A1,B1,C1,D1,,,,,,,,
2,A2,B2,C2,D2,,,,,,,,
3,A3,B3,C3,D3,,,,,,,,
4,,,,,A4,B4,C4,D4,,,,
5,,,,,A5,B5,C5,D5,,,,
6,,,,,A6,B6,C6,D6,,,,
7,,,,,A7,B7,C7,D7,,,,
8,,,,,,,,,A8,B8,C8,D8
9,,,,,,,,,A9,B9,C9,D9


In [174]:
left = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'A':['A0','A1','A2','A3'],
                     'B':['B0','B1','B2','B3']})

In [175]:
right = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'C':['C0','C1','C2','C3'],
                    'D':['D0','D1','D2','D3']})

In [176]:
left

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,B3


In [177]:
right

Unnamed: 0,key,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3


##### Merging

In [178]:
pd.merge(left, right, how='inner', on='key')

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


In [179]:
left = pd.DataFrame({'key1':['K0','K0','K1','K2'],
                     'key2':['K0','K1','K0','K1'],
                     'A':['A0','A1','A2','A3'],
                     'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K1','K1','K2'],
                      'key2':['K0','K0','K0','K0'],
                     'C':['C0','C1','C2','C3'],
                    'D':['D0','D1','D2','D3']})

In [180]:
pd.merge(left,right,on=['key1','key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,A2,B2,C1,D1
2,K1,K0,A2,B2,C2,D2


In [181]:
pd.merge(left,right,how='outer',on=['key1','key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K0,K1,A1,B1,,
2,K1,K0,A2,B2,C1,D1
3,K1,K0,A2,B2,C2,D2
4,K2,K0,,,C3,D3
5,K2,K1,A3,B3,,


In [182]:
pd.merge(left,right,how='left',on=['key1','key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K0,K1,A1,B1,,
2,K1,K0,A2,B2,C1,D1
3,K1,K0,A2,B2,C2,D2
4,K2,K1,A3,B3,,


In [184]:
pd.merge(left,right,how='right',on=['key1','key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,A2,B2,C1,D1
2,K1,K0,A2,B2,C2,D2
3,K2,K0,,,C3,D3


##### Joining

In [186]:
left = pd.DataFrame({
                     'A':['A0','A1','A2','A3'],
                     'B':['B0','B1','B2','B3']}, index=['K0','K1','K2','K3'])
right = pd.DataFrame({
                     'C':['C0','C1','C2','C3'],
                    'D':['D0','D1','D2','D3']},index=['K0','K2','K2','K3'])

In [187]:
left.join(right)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C1,D1
K2,A2,B2,C2,D2
K3,A3,B3,C3,D3


In [188]:
left.join(right, how='outer')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C1,D1
K2,A2,B2,C2,D2
K3,A3,B3,C3,D3


#### Operations

In [209]:
df = pd.DataFrame({'col1':[1,2,3,4],
                   'col2':[444,55,666,444],
                   'col3':['abc','def','ghi','xyz']})

In [190]:
df.head()

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,55,def
2,3,666,ghi
3,4,444,xyz


In [191]:
df.tail()

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,55,def
2,3,666,ghi
3,4,444,xyz


In [194]:
df['col2'].unique()

array([444,  55, 666])

In [195]:
df['col2'].nunique()

3

In [196]:
df['col2'].value_counts()

col2
444    2
55     1
666    1
Name: count, dtype: int64

In [199]:
df[(df['col2']>2) & (df['col2']==444)]

Unnamed: 0,col1,col2,col3
0,1,444,abc
3,4,444,xyz


In [200]:
def times2(x):
    return x*2

In [201]:
df['col1'].sum()

np.int64(10)

In [202]:
df['col1'].apply(times2)

0    2
1    4
2    6
3    8
Name: col1, dtype: int64

In [203]:
df['col3'].apply(len)

0    3
1    3
2    3
3    3
Name: col3, dtype: int64

In [204]:
df['col2'].apply(lambda x : x*2)

0     888
1     110
2    1332
3     888
Name: col2, dtype: int64

In [None]:
df.drop('col1',axis=1,inplace=True)

Unnamed: 0,col2,col3
0,444,abc
1,55,def
2,666,ghi
3,444,xyz


In [211]:
df

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,55,def
2,3,666,ghi
3,4,444,xyz


In [212]:
df.columns

Index(['col1', 'col2', 'col3'], dtype='object')

In [213]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [220]:
df.sort_values('col2')

Unnamed: 0,col1,col2,col3
1,2,55,def
0,1,444,abc
3,4,444,xyz
2,3,666,ghi


In [221]:
df.sort_values('col2',ascending=False)

Unnamed: 0,col1,col2,col3
2,3,666,ghi
0,1,444,abc
3,4,444,xyz
1,2,55,def


In [222]:
df.isnull()

Unnamed: 0,col1,col2,col3
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False


In [None]:
df.pivot_table(values='col3', index=['col2'],columns=['col1'])

#### Data Input and Output

- CSV
- Excel
- HTML
- SQL

In [224]:
import pandas as pd

In [227]:
pd.read_csv('../Python_for_ML/input/winemag-data-130k-v2.csv').head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [228]:
df.to_csv('My_output',index=False)

In [229]:
pd.read_csv('My_output')

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,55,def
2,3,666,ghi
3,4,444,xyz
