In [1]:
import pandas as pd

# 数据的导入和导出

## CSV 文件导入和导出

In [2]:
# 加载 csv 数据
scientists = pd.read_csv('./data/scientists.csv')
scientists

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920/7/25,1958/4/16,37,Chemist
1,William Gosset,1876-06-13,1937/10/16,61,Statistician
2,Florence Nightingale,1820-05-12,1910/8/13,90,Nurse
3,Marie Curie,1867-11-07,1934/7/4,66,Chemist
4,Rachel Carson,1907/5/27,1964/4/14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,,1954/6/7,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [3]:
# 加载 tsv 数据
china_df = pd.read_csv('./data/china.tsv', sep='\t')
china_df

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,China,Asia,1952,44.0,556263527,400.448611
1,China,Asia,1957,50.54896,637408000,575.987001
2,China,Asia,1962,44.50136,665770000,487.674018
3,China,Asia,1967,58.38112,754550000,612.705693
4,China,Asia,1972,63.11888,862030000,676.900092
5,China,Asia,1977,63.96736,943455000,741.23747
6,China,Asia,1982,65.525,1000281000,962.421381
7,China,Asia,1987,67.274,1084035000,1378.904018
8,China,Asia,1992,68.69,1164970000,1655.784158
9,China,Asia,1997,70.426,1230075000,2289.234136


In [15]:
# 将 scientists DataFrame的数据导出保存到一个 scientists_df.tsv 文件中
scientists.to_csv('./data/scientists_df.tsv', sep='\t')

In [16]:
# 将 scientists DataFrame的数据导出保存到一个 scientists_df_noindex.csv 文件中
scientists.to_csv('./data/scientists_df_noindex.csv', index=False)

## Excel 文件导入和导出

In [18]:
# 加载 scientists_df.xlsx 数据
scientists_df = pd.read_excel('./data/scientists_df.xlsx', engine='openpyxl')

scientists_df

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920/7/25,1958/4/16,37,Chemist
1,William Gosset,1876-06-13,1937/10/16,61,Statistician
2,Florence Nightingale,1820-05-12,1910/8/13,90,Nurse
3,Marie Curie,1867-11-07,1934/7/4,66,Chemist
4,Rachel Carson,1907/5/27,1964/4/14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,,1954/6/7,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [19]:
# 将 scientists DataFrame的数据导出保存到一个 scientists_df_noindex.xlsx 文件中
scientists.to_excel('./data/scientists_df_noindex.xlsx', 
                    sheet_name='scientists', index=False)

## SQL 表数据导入和导出

In [8]:
scientists = pd.read_csv('./data/scientists.csv')
scientists

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920/7/25,1958/4/16,37,Chemist
1,William Gosset,1876-06-13,1937/10/16,61,Statistician
2,Florence Nightingale,1820-05-12,1910/8/13,90,Nurse
3,Marie Curie,1867-11-07,1934/7/4,66,Chemist
4,Rachel Carson,1907/5/27,1964/4/14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,,1954/6/7,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [20]:
# 需求：将 scientists 数据保存到一张 tb_scientists 表中
# 导入 sqlalchemy 的数据库引擎
from sqlalchemy import create_engine

# 创建数据库引擎，传入uri规则的字符串
engine = create_engine('mysql+pymysql://root:123456@127.0.0.1:3306/john_practice?charset=utf8')
# mysql+pymysql://root:chuanzhi@127.0.0.1:3306/python?charset=utf8
# mysql：表示数据库类型
# pymysql：表示python操作数据库的包
# root:mysql：表示数据库的账号和密码，用冒号连接
# 127.0.0.1:3306/python：表示数据库的ip和端口，以及名叫python的数据库(注意：数据库需要自己创建)
# charset=utf8：规定编码格式

# 将 DataFrame 数据导出到数据库
scientists.to_sql('tb_scientists', engine, index=False, if_exists='append')
# 第一个参数为数据表的名称
# 第二个参数engine为数据库交互引擎
# index=False：表示不导出行标签
# if_exists='append'：表示如果表存在就追加数据，表不存在就创建表并写入

In [21]:
# 加载 sql 表的数据
pd.read_sql('tb_scientists', engine)

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920/7/25,1958/4/16,37,Chemist
1,William Gosset,1876-06-13,1937/10/16,61,Statistician
2,Florence Nightingale,1820-05-12,1910/8/13,90,Nurse
3,Marie Curie,1867-11-07,1934/7/4,66,Chemist
4,Rachel Carson,1907/5/27,1964/4/14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,,1954/6/7,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [22]:
# 加载 sql 表的数据
pd.read_sql('SELECT Name, Age FROM tb_scientists', engine)

Unnamed: 0,Name,Age
0,Rosaline Franklin,37
1,William Gosset,61
2,Florence Nightingale,90
3,Marie Curie,66
4,Rachel Carson,56
5,John Snow,45
6,Alan Turing,41
7,Johann Gauss,77


## Pickle文件导入和导出(了解)

In [12]:
scientists = pd.read_csv('./data/scientists.csv')
scientists

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920/7/25,1958/4/16,37,Chemist
1,William Gosset,1876-06-13,1937/10/16,61,Statistician
2,Florence Nightingale,1820-05-12,1910/8/13,90,Nurse
3,Marie Curie,1867-11-07,1934/7/4,66,Chemist
4,Rachel Carson,1907/5/27,1964/4/14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,,1954/6/7,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [23]:
scientists.to_pickle('./data/scientists_df.pickle')

In [24]:
# 加载 scientists_df.pickle 文件数据
scientists_df = pd.read_pickle('./data/scientists_df.pickle')
scientists_df

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920/7/25,1958/4/16,37,Chemist
1,William Gosset,1876-06-13,1937/10/16,61,Statistician
2,Florence Nightingale,1820-05-12,1910/8/13,90,Nurse
3,Marie Curie,1867-11-07,1934/7/4,66,Chemist
4,Rachel Carson,1907/5/27,1964/4/14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,,1954/6/7,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician
