# 1 Series

In [1]:
import numpy as np
import pandas as pd
np.random.seed(1)

## 1.1 Seriesの特徴

In [2]:
sample_arr = np.random.randint(1, 10, 4)
sample_list = [1, 2, "a", "b"]
sample_series1 = pd.Series(sample_arr)
sample_series2 = pd.Series(sample_list)

In [3]:
print(sample_series1)
print(type(sample_series1))
print(sample_series1.dtype)

0    6
1    9
2    6
3    1
dtype: int64
<class 'pandas.core.series.Series'>
int64


In [4]:
print(sample_series1.astype(float))

0    6.0
1    9.0
2    6.0
3    1.0
dtype: float64


In [5]:
print(sample_series1[1], type(sample_series1[1]))

9 <class 'numpy.int64'>


In [6]:
print(sample_series2)
print(type(sample_series2))
print(sample_series2.dtype)

0    1
1    2
2    a
3    b
dtype: object
<class 'pandas.core.series.Series'>
object


In [7]:
print(sample_series2[1], type(sample_series2[1]))
print(sample_series2[3], type(sample_series2[3]))

2 <class 'int'>
b <class 'str'>


## 1.2 Seriesの構成要素

In [8]:
sample_list = [1, 2, "a", "b"]
sample_series2 = pd.Series(sample_list)
print(sample_series2)

0    1
1    2
2    a
3    b
dtype: object


In [9]:
print("index")
print(sample_series2, "\n")
print(sample_series2.index, type(sample_series2.index))

index
0    1
1    2
2    a
3    b
dtype: object 

RangeIndex(start=0, stop=4, step=1) <class 'pandas.core.indexes.range.RangeIndex'>


In [10]:
sample_series2.index = ["a", "b", "c", "d"]
print(sample_series2, "\n")
print(sample_series2.index, type(sample_series2.index))

a    1
b    2
c    a
d    b
dtype: object 

Index(['a', 'b', 'c', 'd'], dtype='object') <class 'pandas.core.indexes.base.Index'>


In [11]:
print("value")
print(sample_series2, "\n")
print(sample_series2.values, type(sample_series2.values))
print(sample_series2.values[1], type(sample_series2.values[1]))
print(sample_series2.values[3], type(sample_series2.values[3]))

value
a    1
b    2
c    a
d    b
dtype: object 

[1 2 'a' 'b'] <class 'numpy.ndarray'>
2 <class 'int'>
b <class 'str'>


In [12]:
print("dtype")
print(sample_series2, "\n")
print(sample_series2.dtype, type(sample_series2.dtype))

dtype
a    1
b    2
c    a
d    b
dtype: object 

object <class 'numpy.dtype[object_]'>


In [13]:
print("name")
print(sample_series2, "\n")
print(sample_series2.name, type(sample_series2.name))

name
a    1
b    2
c    a
d    b
dtype: object 

None <class 'NoneType'>


In [14]:
sample_series2.name = "Hello"
print(sample_series2, "\n")
print(sample_series2.name, type(sample_series2.name))

a    1
b    2
c    a
d    b
Name: Hello, dtype: object 

Hello <class 'str'>


## 1.3 演算と関数

In [15]:
sample_series3 = pd.Series(np.random.randint(1, 10, 4))
sample_series4 = pd.Series(np.random.randint(1, 10, 4))

print("sample_series3\n", sample_series3)
print("sample_series4\n", sample_series4)

print("\n四則演算")
print(sample_series3 + sample_series4)
print(sample_series3 - sample_series4)
print(sample_series3 * sample_series4)
print(sample_series3 / sample_series4)

sample_series3
 0    1
1    2
2    8
3    7
dtype: int64
sample_series4
 0    3
1    5
2    6
3    3
dtype: int64

四則演算
0     4
1     7
2    14
3    10
dtype: int64
0   -2
1   -3
2    2
3    4
dtype: int64
0     3
1    10
2    48
3    21
dtype: int64
0    0.333333
1    0.400000
2    1.333333
3    2.333333
dtype: float64


In [16]:
print(sample_series1)

print("\n数学の関数")
print(np.abs(sample_series1))
print(np.sqrt(sample_series1))
print(np.log(sample_series1))
print(np.exp(sample_series1))
print(np.power(sample_series1, 2))

0    6
1    9
2    6
3    1
dtype: int64

数学の関数
0    6
1    9
2    6
3    1
dtype: int64
0    2.44949
1    3.00000
2    2.44949
3    1.00000
dtype: float64
0    1.791759
1    2.197225
2    1.791759
3    0.000000
dtype: float64
0     403.428793
1    8103.083928
2     403.428793
3       2.718282
dtype: float64
0    36
1    81
2    36
3     1
dtype: int64


In [17]:
print("\n統計量に関する関数")
print(np.mean(sample_series1))
print(np.std(sample_series1))
print(np.sum(sample_series1))
print(np.median(sample_series1))

print("\n最小値と最大値")
print(np.min(sample_series1))
print(np.max(sample_series1))

print("\n最小値と最大値のインデックス")
print(np.argmin(sample_series1))
print(np.argmax(sample_series1))

print("\n特殊な関数")
print(np.unique(sample_series1))
print(np.sort(sample_series1))


統計量に関する関数
5.5
2.8722813232690143
22
6.0

最小値と最大値
1
9

最小値と最大値のインデックス
3
1

特殊な関数
[1 6 9]
[1 6 6 9]


# 2 DataFrame

## 2.1 DataFrameの作成

In [18]:
sample_dict = {"StudentID" : np.arange(10, 14),
               "Japanese" : np.random.randint(1, 100, 4),
               "Math" : np.random.randint(1, 100, 4)}
sample_df = pd.DataFrame(sample_dict)
print(sample_df, "\n", type(sample_df))
print(sample_df["StudentID"], "\n", type(sample_df["StudentID"]))
print(sample_df.StudentID, "\n", type(sample_df.StudentID))

   StudentID  Japanese  Math
0         10        85    15
1         11        12    51
2         12        29    69
3         13        30    88 
 <class 'pandas.core.frame.DataFrame'>
0    10
1    11
2    12
3    13
Name: StudentID, dtype: int64 
 <class 'pandas.core.series.Series'>
0    10
1    11
2    12
3    13
Name: StudentID, dtype: int64 
 <class 'pandas.core.series.Series'>


## 2.2 参照

In [19]:
sample_df.head(3)

Unnamed: 0,StudentID,Japanese,Math
0,10,85,15
1,11,12,51
2,12,29,69


In [20]:
# 行や列の指定
print("行指定\n", sample_df.loc[1])
print("\n列指定\n", sample_df.loc[1, "StudentID"])
print("\n複数列指定\n", sample_df.loc[1, ["Japanese", "Math"]])

print("\n数値で指定\n", sample_df.iloc[1, 2])
print("\nスライス\n", sample_df.iloc[1, 1:3])

行指定
 StudentID    11
Japanese     12
Math         51
Name: 1, dtype: int64

列指定
 11

複数列指定
 Japanese    12
Math        51
Name: 1, dtype: int64

数値で指定
 51

スライス
 Japanese    12
Math        51
Name: 1, dtype: int64


In [21]:
# IndexとColumn
print("\nIndex", sample_df.index, type(sample_df.index))
print("\nColumn", sample_df.columns, type(sample_df.columns))

# 条件抽出
print("\n", sample_df[sample_df["Japanese"] > 50])
print("\n", sample_df.loc[sample_df["Japanese"] > 50, "Math"])


Index RangeIndex(start=0, stop=4, step=1) <class 'pandas.core.indexes.range.RangeIndex'>

Column Index(['StudentID', 'Japanese', 'Math'], dtype='object') <class 'pandas.core.indexes.base.Index'>

    StudentID  Japanese  Math
0         10        85    15

 0    15
Name: Math, dtype: int64


## 2.3 代入と変更

In [22]:
sample_df

Unnamed: 0,StudentID,Japanese,Math
0,10,85,15
1,11,12,51
2,12,29,69
3,13,30,88


In [23]:
sample_dict = {"StudentID" : np.arange(10, 14),
               "Japanese" : np.random.randint(1, 100, 4),
               "Math" : np.random.randint(1, 100, 4)}
sample_df = pd.DataFrame(sample_dict)

print("行の指定")
sample_df.loc[0] = 0
print(sample_df)
sample_df.loc[0] = [1, 2, 3]
print(sample_df)

print("\n列の指定")
sample_df.Math = -1
print(sample_df)
sample_df["Math"] = np.arange(4)
print(sample_df)

print("\n条件の指定")
sample_df[sample_df["Japanese"] > 10] = 100
print(sample_df)
sample_df.loc[sample_df["Japanese"] > 10, "Math"] = -100
print(sample_df)

print("\n列の追加")
sample_df["English"] = np.random.randint(1, 100, 4)
print(sample_df)

行の指定
   StudentID  Japanese  Math
0          0         0     0
1         11        95    10
2         12        97     8
3         13        87    64
   StudentID  Japanese  Math
0          1         2     3
1         11        95    10
2         12        97     8
3         13        87    64

列の指定
   StudentID  Japanese  Math
0          1         2    -1
1         11        95    -1
2         12        97    -1
3         13        87    -1
   StudentID  Japanese  Math
0          1         2     0
1         11        95     1
2         12        97     2
3         13        87     3

条件の指定
   StudentID  Japanese  Math
0          1         2     0
1        100       100   100
2        100       100   100
3        100       100   100
   StudentID  Japanese  Math
0          1         2     0
1        100       100  -100
2        100       100  -100
3        100       100  -100

列の追加
   StudentID  Japanese  Math  English
0          1         2     0       62
1        100       100  -100  

## 2.4 関数

In [24]:
sample_dict = {"StudentID" : np.arange(10, 14),
               "Japanese" : np.random.randint(1, 100, 4),
               "Math" : np.random.randint(1, 100, 4)}
sample_df = pd.DataFrame(sample_dict)

In [25]:
sample_df

Unnamed: 0,StudentID,Japanese,Math
0,10,1,89
1,11,61,14
2,12,82,48
3,13,9,73


In [26]:
# 全ての要素に対して
print(np.log(sample_df))

# 指定した行、列に対して
print("\n", np.log(sample_df.loc[1]))
print("\n", np.log(sample_df["Math"]))

# 各軸に対して
print("\n", sample_df.max())
print("\n", sample_df.max(axis=0))

# オリジナル関数の適応
sample_func = lambda x: x+3
print("\n", sample_func(sample_df))
print("\n", sample_func(sample_df["Math"]))

   StudentID  Japanese      Math
0   2.302585  0.000000  4.488636
1   2.397895  4.110874  2.639057
2   2.484907  4.406719  3.871201
3   2.564949  2.197225  4.290459

 StudentID    2.397895
Japanese     4.110874
Math         2.639057
Name: 1, dtype: float64

 0    4.488636
1    2.639057
2    3.871201
3    4.290459
Name: Math, dtype: float64

 StudentID    13
Japanese     82
Math         89
dtype: int64

 StudentID    13
Japanese     82
Math         89
dtype: int64

    StudentID  Japanese  Math
0         13         4    92
1         14        64    17
2         15        85    51
3         16        12    76

 0    92
1    17
2    51
3    76
Name: Math, dtype: int64


## 2.5 csv

In [27]:
sample_dict2 = {"StudentID" : np.arange(10, 110),
               "Japanese" : np.random.randint(1, 100, 100)*np.random.choice([np.nan, 1], 100, p=[0.1, 0.9]),
               "Math" : np.random.randint(1, 100, 100),
               "Sex" : np.random.choice(["Male", "Female"], 100),
               "Class" : np.random.choice(["A", "B", "C", None], 100)}
sample_df2 = pd.DataFrame(sample_dict2)

In [28]:
sample_df2.head()

Unnamed: 0,StudentID,Japanese,Math,Sex,Class
0,10,31.0,11,Female,B
1,11,72.0,69,Male,C
2,12,4.0,24,Male,B
3,13,71.0,15,Female,C
4,14,22.0,64,Male,A


In [29]:
sample_df2.to_csv("csv_data/sample_df.csv", index_label=False)

In [30]:
read_df = pd.read_csv("csv_data/sample_df.csv")

In [31]:
read_df.head()

Unnamed: 0,StudentID,Japanese,Math,Sex,Class
0,10,31.0,11,Female,B
1,11,72.0,69,Male,C
2,12,4.0,24,Male,B
3,13,71.0,15,Female,C
4,14,22.0,64,Male,A
