# Pandas Dataframe 과 Series

In [1]:
import pandas as pd

In [2]:
# pd.Series(['banana',42])
pd.Series(['banana',42],index =['name','age'],name = 'kind')

name    banana
age         42
Name: kind, dtype: object

In [3]:
pd.DataFrame({
    'Name':['Framklin','Gosset'],
    'Born':['1920-07-25','1876-06-03'],
    'Age':['37','61']
})

Unnamed: 0,Name,Born,Age
0,Framklin,1920-07-25,37
1,Gosset,1876-06-03,61


In [4]:
pd.DataFrame([[1,2,3,],[4,5,6],[7,8,9]],
             columns = ['a','b','c'])

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [5]:
df = pd.read_csv('data/scientists.csv')
df.head(2)

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
1,William Gosset,1876-06-13,1937-10-16,61,Statistician


In [20]:
df.tail(2)

Unnamed: 0,Name,Born,Died,Age,Occupation
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [10]:
df.sort_values(['Name','Age'],ascending=False)

Unnamed: 0,Name,Born,Died,Age,Occupation
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
5,John Snow,1813-03-15,1858-06-16,45,Physician
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist


In [12]:
df['Age'].to_frame()

Unnamed: 0,Age
0,37
1,61
2,90
3,66
4,56
5,45
6,41
7,77


In [13]:
df['Age'].mean()

59.125

In [14]:
# df [조건식] # Ture , False
df[df['Age'] >= df['Age'].mean() ]

Unnamed: 0,Name,Born,Died,Age,Occupation
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [17]:
df[[True,False,True,False,True,False,True,False]]

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist


In [20]:
df['Age'] + 100

0    137
1    161
2    190
3    166
4    156
5    145
6    141
7    177
Name: Age, dtype: int64

In [19]:
df['Age'] + pd.Series([1,100])

0     38.0
1    161.0
2      NaN
3      NaN
4      NaN
5      NaN
6      NaN
7      NaN
dtype: float64

In [23]:
rev_age = df['Age'].sort_index(ascending=False)

In [24]:
rev_age + df['Age']

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        8 non-null      object
 1   Born        8 non-null      object
 2   Died        8 non-null      object
 3   Age         8 non-null      int64 
 4   Occupation  8 non-null      object
dtypes: int64(1), object(4)
memory usage: 448.0+ bytes


In [30]:
born_datetime = pd.to_datetime(df['Born'],format='%Y-%m-%d')

In [31]:
died_datetime = pd.to_datetime(df['Died'],format='%Y-%m-%d')

In [32]:
df['born_dt'],df['died_dt'] = (born_datetime,died_datetime)

In [34]:
df['died_dt']-df['born_dt']

0   13779 days
1   22404 days
2   32964 days
3   24345 days
4   20777 days
5   16529 days
6   15324 days
7   28422 days
dtype: timedelta64[ns]

In [37]:
pd.to_datetime('2023년 12월 8일',format='%Y년 %m월 %d일')

Timestamp('2023-12-08 00:00:00')

In [42]:
df.drop(1,axis=0)

Unnamed: 0,Name,Born,Died,Age,Occupation,born_dt,died_dt
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist,1920-07-25,1958-04-16
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse,1820-05-12,1910-08-13
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist,1867-11-07,1934-07-04
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist,1907-05-27,1964-04-14
5,John Snow,1813-03-15,1858-06-16,45,Physician,1813-03-15,1858-06-16
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist,1912-06-23,1954-06-07
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician,1777-04-30,1855-02-23


In [43]:
df.drop(columns=['Died'],index = [3])

Unnamed: 0,Name,Born,Age,Occupation,born_dt,died_dt
0,Rosaline Franklin,1920-07-25,37,Chemist,1920-07-25,1958-04-16
1,William Gosset,1876-06-13,61,Statistician,1876-06-13,1937-10-16
2,Florence Nightingale,1820-05-12,90,Nurse,1820-05-12,1910-08-13
4,Rachel Carson,1907-05-27,56,Biologist,1907-05-27,1964-04-14
5,John Snow,1813-03-15,45,Physician,1813-03-15,1858-06-16
6,Alan Turing,1912-06-23,41,Computer Scientist,1912-06-23,1954-06-07
7,Johann Gauss,1777-04-30,77,Mathematician,1777-04-30,1855-02-23


In [44]:
df.to_csv('test.csv')