# CSV檔案的讀取

In [None]:
import pandas as pd

df = pd.read_csv('covid19.csv')
df

Unnamed: 0,country_ch,country_en,cases,deaths
0,美國,United States,76407539,923087
1,印度,India,42272014,502874
2,巴西,Brazil,26599593,632621
3,法國,France,20804372,132923
4,英國,United Kingdom,17866632,158363
...,...,...,...,...
193,東加,Tonga,8,0
194,萬那杜,Vanuatu,7,1
195,馬紹爾群島,Marshall Islands,7,0
196,密克羅尼西亞聯邦,Micronesia,1,0


# JSON資料的讀取

In [None]:
import pandas as pd

df = pd.read_json('covid19.json')
df

Unnamed: 0,country_ch,country_en,cases,deaths
0,美國,United States,34516883,622158
1,印度,India,31371901,420551
2,巴西,Brazil,19688663,549924
3,俄羅斯,Russia,6126541,153874
4,法國,France,5993937,111644
...,...,...,...,...
190,萬那杜,Vanuatu,4,1
191,馬紹爾群島,Marshall Islands,4,0
192,帛琉,Palau,2,0
193,薩摩亞,Samoa,1,0


# Excel試算表檔案的讀取

In [None]:
import pandas as pd

df = pd.read_excel('covid19.xlsx')
df

Unnamed: 0,country_ch,country_en,cases,deaths
0,美國,United States,76407539,923087
1,印度,India,42272014,502874
2,巴西,Brazil,26599593,632621
3,法國,France,20804372,132923
4,英國,United Kingdom,17866632,158363
...,...,...,...,...
193,東加,Tonga,8,0
194,萬那杜,Vanuatu,7,1
195,馬紹爾群島,Marshall Islands,7,0
196,密克羅尼西亞聯邦,Micronesia,1,0


# HTML網頁資料讀取

In [None]:
import pandas as pd

url = 'https://www.tiobe.com/tiobe-index/'
tables = pd.read_html(url, keep_default_na=False)
tables[0].head(10)

Unnamed: 0,Feb 2022,Feb 2021,Change,Programming Language,Programming Language.1,Ratings,Change.1
0,1,3,,,Python,15.33%,+4.47%
1,2,1,,,C,14.08%,-2.26%
2,3,2,,,Java,12.13%,+0.84%
3,4,4,,,C++,8.01%,+1.13%
4,5,5,,,C#,5.37%,+0.93%
5,6,6,,,Visual Basic,5.23%,+0.90%
6,7,7,,,JavaScript,1.83%,-0.45%
7,8,8,,,PHP,1.79%,+0.04%
8,9,10,,,Assembly language,1.60%,-0.06%
9,10,9,,,SQL,1.55%,-0.18%


# 儲存資料為檔案

In [None]:
import pandas as pd

scores = {'國文':{'王小明':65,'李小美':90,'陳大同':81,'林小玉':79},
          '英文':{'王小明':92,'李小美':72,'陳大同':85,'林小玉':53},
          '數學':{'王小明':78,'李小美':76,'陳大同':91,'林小玉':47},
          '自然':{'王小明':83,'李小美':93,'陳大同':89,'林小玉':94},
          '社會':{'王小明':70,'李小美':56,'陳大同':94,'林小玉':80}}
df = pd.DataFrame(scores)
df.to_csv('scores.csv')

# requests模組：讀取網站檔案

In [None]:
import requests
url = 'http://www.ehappy.tw/demo.htm'
r = requests.get(url)
print(r.text)

<!doctype html>
<html>
  <head>
    <meta charset="UTF-8">
    <title>Hello</title>
  </head>
  <body>
    <p>Hello World!</p>
  </body>
</html>


In [None]:
import requests
# 將查詢參數定義為字典資料加入GET請求中
payload = {'key1': 'value1', 'key2': 'value2'}
r = requests.get("http://httpbin.org/get", params=payload)
print(r.text)

{
  "args": {
    "key1": "value1", 
    "key2": "value2"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.23.0", 
    "X-Amzn-Trace-Id": "Root=1-6203982c-011d569f3be97bfa789f5494"
  }, 
  "origin": "104.199.123.139", 
  "url": "http://httpbin.org/get?key1=value1&key2=value2"
}



In [None]:
import requests
# 將查詢參數加入POST請求中
payload = {'key1': 'value1', 'key2': 'value2'}
r = requests.post("http://httpbin.org/post", data=payload)
print(r.text)

{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "key1": "value1", 
    "key2": "value2"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Content-Length": "23", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.23.0", 
    "X-Amzn-Trace-Id": "Root=1-62039928-330ede5b7a48562338374184"
  }, 
  "json": null, 
  "origin": "104.199.123.139", 
  "url": "http://httpbin.org/post"
}



# BeautifulSoup模組：網頁解析

In [None]:
import requests
from bs4 import BeautifulSoup
url = 'http://www.ehappy.tw/bsdemo1.htm'
html = requests.get(url)
html.encoding = 'UTF-8'
sp = BeautifulSoup(html.text, 'html.parser')
print(sp.title)			
print(sp.title.text)	
print(sp.h1)

<title>我是網頁標題</title>
我是網頁標題
<h1 class="large">我是標題</h1>


In [None]:
from bs4 import BeautifulSoup
html = '''
<html>
  <head><meta charset="UTF-8"><title>我是網頁標題</title></head>
  <body>
      <p id="p1">我是段落一</p>
      <p id="p2" class='red'>我是段落二</p>
  </body>
</html>
'''
sp = BeautifulSoup(html, 'html.parser')
print(sp.find('p'))
print(sp.find_all('p'))
print(sp.find('p', {'id':'p2', 'class':'red'}))
print(sp.find('p', id='p2', class_= 'red'))

<p id="p1">我是段落一</p>
[<p id="p1">我是段落一</p>, <p class="red" id="p2">我是段落二</p>]
<p class="red" id="p2">我是段落二</p>
<p class="red" id="p2">我是段落二</p>


In [None]:
from bs4 import BeautifulSoup
html = '''
<html>
  <head><meta charset="UTF-8"><title>我是網頁標題</title></head>
  <body>
      <p id="p1">我是段落一</p>
      <p id="p2" class='red'>我是段落二</p>
  </body>
</html>
'''
sp = BeautifulSoup(html, 'html.parser')
print(sp.select('title'))
print(sp.select('p'))
print(sp.select('#p1'))
print(sp.select('.red'))

[<title>我是網頁標題</title>]
[<p id="p1">我是段落一</p>, <p class="red" id="p2">我是段落二</p>]
[<p id="p1">我是段落一</p>]
[<p class="red" id="p2">我是段落二</p>]


In [None]:
html = '''
<html>
  <head><meta charset="UTF-8"><title>我是網頁標題</title></head>
  <body>
      <img src="http://www.ehappy.tw/python.png">
      <a href="http://www.e-happy.com.tw">超連結</a>
  </body>
</html>
'''
sp = BeautifulSoup(html, 'html.parser')
# 用 回傳值.get(屬性名稱) 取得圖片及超連結的網址
print(sp.find('img').get('src'))
print(sp.find('a').get('href'))
# 用 回傳值[屬性名稱] 取得圖片及超連結的網址
print(sp.find('img')['src'])
print(sp.find('a')['href'])  

http://www.ehappy.tw/python.png
http://www.e-happy.com.tw
http://www.ehappy.tw/python.png
http://www.e-happy.com.tw


# 文字及檔案資料的收集

In [6]:
content='''Hello Python
中文字測試
Welcome'''
f=open('file1.txt', 'w', encoding='utf-8', newline="")
f.write(content)
f.close()

In [7]:
content='''Hello Python
中文字測試
Welcome'''
with open('file1.txt', 'w', encoding='utf-8', newline="") as f:
    f.write(content)

In [8]:
with open('file1.txt', 'r', encoding='utf-8') as f:
    output_str=f.read()
    print(output_str)   # Hello

Hello Python
中文字測試
Welcome


In [9]:
with open('file1.txt', 'r', encoding ='UTF-8') as f:
    print(f.readline())
    print(f.readline(3))

Hello Python

中文字


In [10]:
with open('file1.txt', 'r', encoding='utf-8') as f:
    content=f.readlines()
    print(type(content))
    print(content)

<class 'list'>
['Hello Python\n', '中文字測試\n', 'Welcome']


In [11]:
with open('file1.txt', 'r', encoding='utf-8') as f:
    print(f.readlines())

['Hello Python\n', '中文字測試\n', 'Welcome']


In [12]:
import requests

imgurl = 'https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png'
r = requests.get(imgurl)

with open('google.png', 'wb') as f:
  f.write(r.content)