#DataFrame

In [0]:
import pandas as pd
import numpy as np

## Series(1次元のリスト)の作成

In [0]:
# 数値で構成されるSeries
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [0]:
# 日付のSeries
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

## データフレーム作成

In [0]:
df = pd.DataFrame({
    'A' : 1,
    'B' : pd.Timestamp('20130102'),
    'C' : pd.Series(1, index=list(range(4)), dtype='float32'),
    'D' : np.array([3] * 4, dtype='int32'),
    'E' : pd.Categorical(["test", "train", "test", "train"]),
    'F' : 'foo'
})
df

Unnamed: 0,A,B,C,D,E,F
0,1,2013-01-02,1.0,3,test,foo
1,1,2013-01-02,1.0,3,train,foo
2,1,2013-01-02,1.0,3,test,foo
3,1,2013-01-02,1.0,3,train,foo


### Numpyの行列から作成

In [0]:
matrix = np.random.randn(6, 4)
matrix

array([[ 0.27751728, -1.08777962, -0.70211239,  1.15946615],
       [-0.04613276, -1.79832864,  0.8094628 , -0.09072435],
       [ 0.19310652, -1.76261838,  0.11054288, -1.08022388],
       [-1.87732002, -0.49153633, -0.52812458,  1.58980174],
       [ 0.43376055,  0.04443699, -0.4622891 ,  1.60760091],
       [ 0.51213095,  0.35621362, -0.15148458, -0.81389829]])

In [0]:
df2 = pd.DataFrame(matrix, columns=list('ABCD'))
df2

Unnamed: 0,A,B,C,D
0,0.277517,-1.08778,-0.702112,1.159466
1,-0.046133,-1.798329,0.809463,-0.090724
2,0.193107,-1.762618,0.110543,-1.080224
3,-1.87732,-0.491536,-0.528125,1.589802
4,0.433761,0.044437,-0.462289,1.607601
5,0.512131,0.356214,-0.151485,-0.813898


### ディクショナリから作成

In [0]:
a_values = [1, 2, 3, 4, 5]
b_values = np.random.rand(5)
c_values = ["apple", "banana", "strawberry", "peach", "orange"]
my_dict = { "A" : a_values, "B" : b_values, "C" : c_values }
my_dict

{'A': [1, 2, 3, 4, 5],
 'B': array([0.59338483, 0.15339508, 0.49882545, 0.59323485, 0.57322696]),
 'C': ['apple', 'banana', 'strawberry', 'peach', 'orange']}

In [0]:
my_df = pd.DataFrame.from_dict(my_dict)
my_df

Unnamed: 0,A,B,C
0,1,0.593385,apple
1,2,0.153395,banana
2,3,0.498825,strawberry
3,4,0.593235,peach
4,5,0.573227,orange


## データフレームの確認

In [0]:
df = pd.DataFrame(np.random.randn(6, 4), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,0.239821,-0.533608,-1.278146,0.572269
1,-0.914126,-0.328203,0.61819,-0.5307
2,-0.377864,-1.175781,-1.586262,-0.00843
3,-0.491579,0.878995,-0.998046,-1.744889
4,1.24161,-1.301067,0.200972,0.889438
5,-0.506548,0.344752,0.60724,-0.792396


### 先頭N行を表示

In [0]:
df.head(3)

Unnamed: 0,A,B,C,D
0,0.239821,-0.533608,-1.278146,0.572269
1,-0.914126,-0.328203,0.61819,-0.5307
2,-0.377864,-1.175781,-1.586262,-0.00843


In [0]:
df.head()

Unnamed: 0,A,B,C,D
0,0.239821,-0.533608,-1.278146,0.572269
1,-0.914126,-0.328203,0.61819,-0.5307
2,-0.377864,-1.175781,-1.586262,-0.00843
3,-0.491579,0.878995,-0.998046,-1.744889
4,1.24161,-1.301067,0.200972,0.889438


### 末尾N行を表示

In [0]:
df.tail(2)

Unnamed: 0,A,B,C,D
4,1.24161,-1.301067,0.200972,0.889438
5,-0.506548,0.344752,0.60724,-0.792396


In [0]:
df.tail()

Unnamed: 0,A,B,C,D
1,-0.914126,-0.328203,0.61819,-0.5307
2,-0.377864,-1.175781,-1.586262,-0.00843
3,-0.491579,0.878995,-0.998046,-1.744889
4,1.24161,-1.301067,0.200972,0.889438
5,-0.506548,0.344752,0.60724,-0.792396


### 基本統計量を算出

In [0]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.134781,-0.352485,-0.406009,-0.269118
std,0.770294,0.850141,0.994808,0.96315
min,-0.914126,-1.301067,-1.586262,-1.744889
25%,-0.502806,-1.015238,-1.208121,-0.726972
50%,-0.434722,-0.430906,-0.398537,-0.269565
75%,0.085399,0.176513,0.505673,0.427095
max,1.24161,0.878995,0.61819,0.889438


### 各列の型を確認

In [0]:
df2 = pd.DataFrame({
    'A' : 1,
    'B' : pd.Timestamp('20130102'),
    'C' : pd.Series(1, index=list(range(4)), dtype='float32'),
    'D' : np.array([3] * 4, dtype='int32'),
    'E' : pd.Categorical(["test", "train", "test", "train"]),
    'F' : 'foo'
})
df2

Unnamed: 0,A,B,C,D,E,F
0,1,2013-01-02,1.0,3,test,foo
1,1,2013-01-02,1.0,3,train,foo
2,1,2013-01-02,1.0,3,test,foo
3,1,2013-01-02,1.0,3,train,foo


In [0]:
df2.dtypes

A             int64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### 行名(index)、列名(columns)の表示

In [0]:
dates = pd.date_range('2019-06-20', periods=6)
df3 = pd.DataFrame(np.random.randn(6, 4), index = dates, columns = list('ABCD'))
df3

Unnamed: 0,A,B,C,D
2019-06-20,0.701459,0.934766,1.348135,0.808644
2019-06-21,-1.165626,-0.772589,1.367118,-1.332559
2019-06-22,-1.548485,-0.087012,1.676307,-0.277688
2019-06-23,-0.589377,1.355439,0.061758,-1.279669
2019-06-24,-1.955301,0.098719,0.835785,-1.959956
2019-06-25,-0.200656,-1.075562,-0.703483,1.026003


In [0]:
df3.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [0]:
df3.index

DatetimeIndex(['2019-06-20', '2019-06-21', '2019-06-22', '2019-06-23',
               '2019-06-24', '2019-06-25'],
              dtype='datetime64[ns]', freq='D')

### 値のみ表示

In [0]:
df3.values

array([[ 0.70145876,  0.93476592,  1.34813545,  0.80864421],
       [-1.16562585, -0.77258875,  1.36711762, -1.33255948],
       [-1.54848465, -0.0870115 ,  1.67630669, -0.27768806],
       [-0.58937673,  1.35543896,  0.06175801, -1.27966949],
       [-1.95530124,  0.09871926,  0.83578472, -1.95995599],
       [-0.20065628, -1.07556243, -0.70348304,  1.02600346]])

## データフレームから特定行・列の取得

In [0]:
# "A"列を抽出
df3['A']

2019-06-20    0.701459
2019-06-21   -1.165626
2019-06-22   -1.548485
2019-06-23   -0.589377
2019-06-24   -1.955301
2019-06-25   -0.200656
Freq: D, Name: A, dtype: float64

In [0]:
# .を利用しても同じ結果が取得可能
df3.A

2019-06-20    0.701459
2019-06-21   -1.165626
2019-06-22   -1.548485
2019-06-23   -0.589377
2019-06-24   -1.955301
2019-06-25   -0.200656
Freq: D, Name: A, dtype: float64

### 特定区間の行抽出

In [0]:
# 1行目から3行目
df3[1:3]

Unnamed: 0,A,B,C,D
2019-06-21,-1.165626,-0.772589,1.367118,-1.332559
2019-06-22,-1.548485,-0.087012,1.676307,-0.277688


In [0]:
# 先頭から3行目まで
df3[:3]

Unnamed: 0,A,B,C,D
2019-06-20,0.701459,0.934766,1.348135,0.808644
2019-06-21,-1.165626,-0.772589,1.367118,-1.332559
2019-06-22,-1.548485,-0.087012,1.676307,-0.277688


In [0]:
df3['2019-06-20' : '2019-06-22']

Unnamed: 0,A,B,C,D
2019-06-20,0.701459,0.934766,1.348135,0.808644
2019-06-21,-1.165626,-0.772589,1.367118,-1.332559
2019-06-22,-1.548485,-0.087012,1.676307,-0.277688


### locアトリビュートを使用して特定行・列抽出

In [0]:
df3.loc['2019-06-20']

A    0.701459
B    0.934766
C    1.348135
D    0.808644
Name: 2019-06-20 00:00:00, dtype: float64

In [0]:
df3.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2019-06-20,0.701459,0.934766
2019-06-21,-1.165626,-0.772589
2019-06-22,-1.548485,-0.087012
2019-06-23,-0.589377,1.355439
2019-06-24,-1.955301,0.098719
2019-06-25,-0.200656,-1.075562


In [0]:
df3.loc['20190620' : '20190623', ['A', 'B']]

Unnamed: 0,A,B
2019-06-20,0.701459,0.934766
2019-06-21,-1.165626,-0.772589
2019-06-22,-1.548485,-0.087012
2019-06-23,-0.589377,1.355439


###  行や列の位置を指定して行・列取得する

In [0]:
# 3行目を取得
df3.iloc[3]

A   -0.589377
B    1.355439
C    0.061758
D   -1.279669
Name: 2019-06-23 00:00:00, dtype: float64

In [0]:
# 1, 2, 4　行目と0-2行目取得する
df3.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2019-06-21,-1.165626,1.367118
2019-06-22,-1.548485,1.676307
2019-06-24,-1.955301,0.835785


In [0]:
# 1-3行目と全ての列を取得する
df3.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2019-06-21,-1.165626,-0.772589,1.367118,-1.332559
2019-06-22,-1.548485,-0.087012,1.676307,-0.277688


In [0]:
# 1行目の１列目の値を取得する
df3.iloc[1, 1]

-0.772588752506288

###  条件を指定して行・列を取得する

In [0]:
# Aの値の列が０より大きい行を取得する
df3[df3.A > 0]

Unnamed: 0,A,B,C,D
2019-06-20,0.701459,0.934766,1.348135,0.808644


In [0]:
# 値が０より大きい値のみ取得する
df[df > 0]

Unnamed: 0,A,B,C,D
0,0.239821,,,0.572269
1,,,0.61819,
2,,,,
3,,0.878995,,
4,1.24161,,0.200972,0.889438
5,,0.344752,0.60724,


## データフレームに行や列を追加する

###  行を追加する

In [0]:
df = pd.DataFrame([[1, 2], [3, 4]], columns = list('AB'))
df

Unnamed: 0,A,B
0,1,2
1,3,4


In [0]:
df2 = pd.DataFrame([[5, 6]], columns = list('AB'))
df2

Unnamed: 0,A,B
0,5,6


In [0]:
df.append(df2)

Unnamed: 0,A,B
0,1,2
1,3,4
0,5,6


In [0]:
# ignore_index = Trueを指定することで新たな行番号を割り当てることができる
df.append(df2, ignore_index=True)

Unnamed: 0,A,B
0,1,2
1,3,4
2,5,6


### 列を追加する

In [0]:
df = pd.DataFrame([["0001", "John"], ["0002", "Lily"]], columns=['id', 'name'])
df

Unnamed: 0,id,name
0,1,John
1,2,Lily


In [0]:
df['job'] = ["Enginner", "Sales"]
df

Unnamed: 0,id,name,job
0,1,John,Enginner
1,2,Lily,Sales


In [0]:
df['age'] = np.array([35, 25])
df

Unnamed: 0,id,name,job,age
0,1,John,Enginner,35
1,2,Lily,Sales,25


## 行を削除する

In [0]:
df

Unnamed: 0,id,name,job,age
0,1,John,Enginner,35
1,2,Lily,Sales,25


In [0]:
df = pd.DataFrame(np.random.randn(6,4), columns=list('ABCD'))
>>> df

Unnamed: 0,A,B,C,D
0,-0.209766,0.439074,-0.701925,-1.380869
1,-1.555862,-1.60978,-0.418562,-0.483262
2,-0.848395,0.210524,-1.627906,-0.015108
3,0.367913,1.53586,1.016379,0.372341
4,0.346825,0.710988,0.632432,0.497509
5,1.401447,-0.270169,-0.767895,-0.876912


In [0]:
df.drop(5)

Unnamed: 0,A,B,C,D
0,-0.209766,0.439074,-0.701925,-1.380869
1,-1.555862,-1.60978,-0.418562,-0.483262
2,-0.848395,0.210524,-1.627906,-0.015108
3,0.367913,1.53586,1.016379,0.372341
4,0.346825,0.710988,0.632432,0.497509


In [0]:
df.drop([3, 4])

Unnamed: 0,A,B,C,D
0,-0.209766,0.439074,-0.701925,-1.380869
1,-1.555862,-1.60978,-0.418562,-0.483262
2,-0.848395,0.210524,-1.627906,-0.015108
5,1.401447,-0.270169,-0.767895,-0.876912


### 特定列を削除する

In [0]:
df.drop("A", axis=1)

Unnamed: 0,B,C,D
0,0.439074,-0.701925,-1.380869
1,-1.60978,-0.418562,-0.483262
2,0.210524,-1.627906,-0.015108
3,1.53586,1.016379,0.372341
4,0.710988,0.632432,0.497509
5,-0.270169,-0.767895,-0.876912


In [0]:
# よりl簡単な方法
del df['B']
df

Unnamed: 0,C,D
0,-0.701925,-1.380869
1,-0.418562,-0.483262
2,-1.627906,-0.015108
3,1.016379,0.372341
4,0.632432,0.497509
5,-0.767895,-0.876912


## データフレームの行・列の長さを確認

In [0]:
df = pd.DataFrame(np.random.randn(6, 4), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,-0.151266,1.622869,2.422348,-1.001805
1,1.555414,-0.852879,0.324966,0.027044
2,1.489366,1.514213,0.286235,0.117792
3,-0.232544,-0.537574,1.71195,-0.055142
4,-1.324319,0.347519,0.725988,-2.370252
5,0.600357,0.51734,-0.682841,-0.226218


In [0]:
len(df.index)

6

In [0]:
len(df.columns)

4

In [0]:
df.shape

(6, 4)

## データフレームの行・列を入れ替える

In [0]:
df

Unnamed: 0,A,B,C,D
0,-0.151266,1.622869,2.422348,-1.001805
1,1.555414,-0.852879,0.324966,0.027044
2,1.489366,1.514213,0.286235,0.117792
3,-0.232544,-0.537574,1.71195,-0.055142
4,-1.324319,0.347519,0.725988,-2.370252
5,0.600357,0.51734,-0.682841,-0.226218


In [0]:
# dfを転置
df.T

Unnamed: 0,0,1,2,3,4,5
A,-0.151266,1.555414,1.489366,-0.232544,-1.324319,0.600357
B,1.622869,-0.852879,1.514213,-0.537574,0.347519,0.51734
C,2.422348,0.324966,0.286235,1.71195,0.725988,-0.682841
D,-1.001805,0.027044,0.117792,-0.055142,-2.370252,-0.226218


## データフレームをソートする

In [0]:
dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.272755,-1.000416,-1.139808,-1.173016
2013-01-02,1.492231,0.372299,0.266907,0.108464
2013-01-03,0.005945,-0.744854,-0.816883,0.722975
2013-01-04,-1.006583,-0.068641,0.743799,1.222963
2013-01-05,0.089081,0.337843,0.668942,-0.995733
2013-01-06,0.802875,-0.666533,0.636733,-0.214056


## インデックスに基づいてソートする

In [0]:
# 行名に基づいてソート
df.sort_index(ascending=False)

Unnamed: 0,A,B,C,D
2013-01-06,0.802875,-0.666533,0.636733,-0.214056
2013-01-05,0.089081,0.337843,0.668942,-0.995733
2013-01-04,-1.006583,-0.068641,0.743799,1.222963
2013-01-03,0.005945,-0.744854,-0.816883,0.722975
2013-01-02,1.492231,0.372299,0.266907,0.108464
2013-01-01,-0.272755,-1.000416,-1.139808,-1.173016


In [0]:
# カラム名に基づいてソートする
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-1.173016,-1.139808,-1.000416,-0.272755
2013-01-02,0.108464,0.266907,0.372299,1.492231
2013-01-03,0.722975,-0.816883,-0.744854,0.005945
2013-01-04,1.222963,0.743799,-0.068641,-1.006583
2013-01-05,-0.995733,0.668942,0.337843,0.089081
2013-01-06,-0.214056,0.636733,-0.666533,0.802875


### 値に基づいてソートする

In [0]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-01,-0.272755,-1.000416,-1.139808,-1.173016
2013-01-03,0.005945,-0.744854,-0.816883,0.722975
2013-01-06,0.802875,-0.666533,0.636733,-0.214056
2013-01-04,-1.006583,-0.068641,0.743799,1.222963
2013-01-05,0.089081,0.337843,0.668942,-0.995733
2013-01-02,1.492231,0.372299,0.266907,0.108464


In [0]:
df.sort_values(by='C', ascending=False)

Unnamed: 0,A,B,C,D
2013-01-04,-1.006583,-0.068641,0.743799,1.222963
2013-01-05,0.089081,0.337843,0.668942,-0.995733
2013-01-06,0.802875,-0.666533,0.636733,-0.214056
2013-01-02,1.492231,0.372299,0.266907,0.108464
2013-01-03,0.005945,-0.744854,-0.816883,0.722975
2013-01-01,-0.272755,-1.000416,-1.139808,-1.173016


## データフレームの結合(マージ, JOIN)

`merge(left, right, how='inner', on=None, left_on=None, right_on=None,
      left_index=False, right_index=False, sort=True,
      suffixes=('_x', '_y'), copy=True, indicator=False)`
      
    

---
|引数|内容|
|:---|--:|
|left|	データフレーム (DataFrame) オブジェクト。|
|right	|もう一つのデータフレーム (DataFrame) オブジェクト。|
|on	|結合に用いる行の名前。left と right のデータフレーム両方に存在する必要あり。値が設定されていなく、かつ left_index と right_index も設定されていない場合は、結合に用いるキーを推測により選択。|
|left_on|	left のデータフレームでキーとして用いる列名、または配列。|
|right_on	|right のデータフレームでキーとして用いる列名、または配列を選択。|
|left_index	|True を設定すると、left のデータフレームの行ラベルを結合のキーとして用いる。MultiIndex (階層的なインデックス構造) を持つデータフレームの場合、階層数を left と right で合わせる必要があり。|
|right_index	|left_index と同じ。|
|how|	‘left’, ‘right’, ‘outer’, ‘inner’ のいずれかを設定。 (デフォルトは “inner”)|
|sort	|True を設定すると、結合後のデータフレームをソート。(デフォルトは True)|
|suffixes|	同一のカラム名が存在した場合に、後ろに文字列を追加して区別。 (デフォルトは ‘_x’, ‘_y’)|
|copy	|常に与えられたデータフレームをコピーします。場合によっては、False に設定すると、パフォーマンスやメモリの使用量を向上できる場合があり。 (デフォルトは True)|
|indicator|	_merge という名前のカラムを出力後のデータフレームに追加し、結合前の行に関する情報を格納。|

### 内部結合

In [0]:
customer = pd.DataFrame([["0001", "John"], ["0002", "Lily"]], columns=['customer_id', 'name'])
customer

Unnamed: 0,customer_id,name
0,1,John
1,2,Lily


In [0]:
order = pd.DataFrame([["0001", "Smartphone"],
                          ["0001", "Wireless Charger"],
                          ["0002", "Wearable watch"]],
                          columns=['customer_id', 'product_name'])
order

Unnamed: 0,customer_id,product_name
0,1,Smartphone
1,1,Wireless Charger
2,2,Wearable watch


In [0]:
pd.merge(customer, order, how="inner", on="customer_id")

Unnamed: 0,customer_id,name,product_name
0,1,John,Smartphone
1,1,John,Wireless Charger
2,2,Lily,Wearable watch


In [0]:
# 結合に用いるキーが異なる場合
employee = pd.DataFrame([["Miki", "Tokyo"],["Ichiro", "Osaka"]],
                            columns=['employee_name', 'office_name'])
employee

Unnamed: 0,employee_name,office_name
0,Miki,Tokyo
1,Ichiro,Osaka


In [0]:
office = pd.DataFrame([["Tokyo", "1-2-3 Chiyoda-ku Tokyo"],
                           ["Osaka", "3-2-1 Chuo-ku Osaka"]],
                          columns=['name', 'address'])
office

Unnamed: 0,name,address
0,Tokyo,1-2-3 Chiyoda-ku Tokyo
1,Osaka,3-2-1 Chuo-ku Osaka


In [0]:
pd.merge(employee, office, how="inner", left_on="office_name", right_on="name")

Unnamed: 0,employee_name,office_name,name,address
0,Miki,Tokyo,Tokyo,1-2-3 Chiyoda-ku Tokyo
1,Ichiro,Osaka,Osaka,3-2-1 Chuo-ku Osaka


### 外部結合

In [0]:
products = pd.DataFrame([["P-001", "Orange"],
                             ["P-002", "Apple"],
                             ["P-003", "Blueberry"]],
                            columns=['product_id', 'name'])
products

Unnamed: 0,product_id,name
0,P-001,Orange
1,P-002,Apple
2,P-003,Blueberry


In [0]:
stock = pd.DataFrame([["P-001", 10],
                          ["P-002", 20],
                          ["P-010", 30]],
                         columns=['product_id', 'amount'])
stock

Unnamed: 0,product_id,amount
0,P-001,10
1,P-002,20
2,P-010,30


In [0]:
#  左外部結合
pd.merge(products, stock, how="left", on="product_id")

Unnamed: 0,product_id,name,amount
0,P-001,Orange,10.0
1,P-002,Apple,20.0
2,P-003,Blueberry,


In [0]:
# 右外部結合
pd.merge(products, stock, how="right", on="product_id")

Unnamed: 0,product_id,name,amount
0,P-001,Orange,10
1,P-002,Apple,20
2,P-010,,30


In [0]:
# 完全結合
pd.merge(products, stock, how="outer", on="product_id")

Unnamed: 0,product_id,name,amount
0,P-001,Orange,10.0
1,P-002,Apple,20.0
2,P-003,Blueberry,
3,P-010,,30.0


### concatによる結合

In [0]:
df1 = pd.DataFrame([["0001", "John"],
                        ["0002", "Lily"]],
                       columns=['id', 'name'])
df1

Unnamed: 0,id,name
0,1,John
1,2,Lily


In [0]:
df2 = pd.DataFrame([["0003", "Chris"],
                        ["0004", "Jessica"]],
                       columns=['id', 'name'])
df2

Unnamed: 0,id,name
0,3,Chris
1,4,Jessica


In [0]:
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,id,name
0,1,John
1,2,Lily
2,3,Chris
3,4,Jessica


## PandasでCSVファイルやテキストファイルを読み込む  
CSVファイルのロード：read_csv()  
テキストファイルのロード：read_table()

In [12]:
from google.colab import files
uploaded = files.upload()

Saving sample_dataset.csv to sample_dataset (1).csv


In [13]:
uploaded

{'sample_dataset.csv': b'ID,Name,Birthdate,Sex,Occupation,Salary\r\nID-0001,Abe,1985/1/1,M,Engineer,8422213\r\nID-0002,Saito,1970/2/11,F,Professor,8222588\r\nID-0003,Yamada,1975/3/21,M,Doctor,9845288\r\nID-0004,Tanaka,1980/4/22,F,Sales,8505218\r\nID-0005,Okamoto,1995/5/25,M,Student,218103\r\n'}

In [14]:
import io
data = pd.read_csv(io.StringIO(uploaded['sample_dataset.csv'].decode('utf-8')), header=-1)
data.head()

Unnamed: 0,0,1,2,3,4,5
0,ID,Name,Birthdate,Sex,Occupation,Salary
1,ID-0001,Abe,1985/1/1,M,Engineer,8422213
2,ID-0002,Saito,1970/2/11,F,Professor,8222588
3,ID-0003,Yamada,1975/3/21,M,Doctor,9845288
4,ID-0004,Tanaka,1980/4/22,F,Sales,8505218


### URL指定によるファイル読み込み

In [0]:
dataset2 = pd.read_csv("http://pythondatascience.plavox.info/wp-content/uploads/2016/05/sample_dataset.csv")
dataset2

Unnamed: 0,ID,Name,Birthdate,Sex,Occupation,Salary
0,ID-0001,Abe,1985/1/1,M,Engineer,8422213
1,ID-0002,Saito,1970/2/11,F,Professor,8222588
2,ID-0003,Yamada,1975/3/21,M,Doctor,9845288
3,ID-0004,Tanaka,1980/4/22,F,Sales,8505218
4,ID-0005,Okamoto,1995/5/25,M,Student,218103


## データフレームをCSVファイルやテキストファイルに出力する

CSVファイルとして出力するメソッドとして、`DataFrame.to_csv()`がある。また区切り文字をCSVファイルで用いるカンマ(,)タブ(\t)などへ置き換えることで、テキストファイルとして出力することができる。