In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL de la página web
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/netflix_data_webpage.html"

# Hacer la solicitud a la página
try:
    print("Haciendo la solicitud a la página...")
    response = requests.get(url)
    response.raise_for_status()  # Verifica si la solicitud fue exitosa
    print("Solicitud exitosa.")
except requests.exceptions.RequestException as e:
    print(f"Error al hacer la solicitud: {e}")
    exit()

# Parsear el contenido HTML
soup = BeautifulSoup(response.content, 'html.parser')

# Encontrar la tabla
print("Buscando la tabla en la página...")
table = soup.find('table')
if table is None:
    print("No se encontró la tabla.")
    exit()
else:
    print("Tabla encontrada.")

# Inicializar listas para almacenar datos
dates = []
opens = []
highs = []
lows = []
closes = []
adj_closes = []
volumes = []

# Iterar sobre las filas de la tabla
print("Extrayendo datos de la tabla...")
for row in table.find_all('tr')[1:]:  # Salta el encabezado
    cols = row.find_all('td')
    if len(cols) >= 7:  # Asegúrate de que hay al menos 7 columnas
        dates.append(cols[0].text)
        opens.append(cols[1].text)
        highs.append(cols[2].text)
        lows.append(cols[3].text)
        closes.append(cols[4].text)
        adj_closes.append(cols[5].text)
        volumes.append(cols[6].text)

# Crear un DataFrame de pandas
df = pd.DataFrame({
    'Date': dates,
    'Open': opens,
    'High': highs,
    'Low': lows,
    'Close': closes,
    'Adj Close': adj_closes,
    'Volume': volumes
})

# Mostrar el DataFrame
print("Datos extraídos:")
print(df)

Haciendo la solicitud a la página...
Solicitud exitosa.
Buscando la tabla en la página...
Tabla encontrada.
Extrayendo datos de la tabla...
Datos extraídos:
            Date    Open    High     Low   Close Adj Close       Volume
0   Jun 01, 2021  504.01  536.13  482.14  528.21    528.21   78,560,600
1   May 01, 2021  512.65  518.95  478.54  502.81    502.81   66,927,600
2   Apr 01, 2021  529.93  563.56  499.00  513.47    513.47  111,573,300
3   Mar 01, 2021  545.57  556.99  492.85  521.66    521.66   90,183,900
4   Feb 01, 2021  536.79  566.65  518.28  538.85    538.85   61,902,300
..           ...     ...     ...     ...     ...       ...          ...
65  Jan 01, 2016  109.00  122.18   90.11   91.84     91.84  488,193,200
66  Dec 01, 2015  124.47  133.27  113.85  114.38    114.38  319,939,200
67  Nov 01, 2015  109.20  126.60  101.86  123.33    123.33  320,321,800
68  Oct 01, 2015  102.91  115.83   96.26  108.38    108.38  446,204,400
69  Sep 01, 2015  109.35  111.24   93.55  103.26   

In [2]:
df.to_csv('datos_netflix.csv', index=False)

In [3]:
import pandas as pd

# Cargar el archivo CSV
df_cargado = pd.read_csv('datos_netflix.csv')

# Mostrar las primeras filas del DataFrame
print(df_cargado.head())

           Date    Open    High     Low   Close  Adj Close       Volume
0  Jun 01, 2021  504.01  536.13  482.14  528.21     528.21   78,560,600
1  May 01, 2021  512.65  518.95  478.54  502.81     502.81   66,927,600
2  Apr 01, 2021  529.93  563.56  499.00  513.47     513.47  111,573,300
3  Mar 01, 2021  545.57  556.99  492.85  521.66     521.66   90,183,900
4  Feb 01, 2021  536.79  566.65  518.28  538.85     538.85   61,902,300


In [4]:
print(df_cargado.describe())

             Open        High         Low       Close   Adj Close
count   70.000000   70.000000   70.000000   70.000000   70.000000
mean   280.746000  307.922286  260.706714  286.039571  286.039571
std    145.711699  158.539029  136.575328  146.445067  146.445067
min     90.410000   97.480000   79.950000   90.030000   90.030000
25%    141.610000  146.535000  138.360000  143.550000  143.550000
50%    292.345000  330.315000  264.110000  294.550000  294.550000
75%    373.875000  393.377500  342.420000  373.232500  373.232500
max    545.570000  593.290000  518.280000  540.730000  540.730000


In [9]:
print(df_cargado.columns)

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')


In [10]:
print(df_cargado.head())

           Date    Open    High     Low   Close  Adj Close       Volume
0  Jun 01, 2021  504.01  536.13  482.14  528.21     528.21   78,560,600
1  May 01, 2021  512.65  518.95  478.54  502.81     502.81   66,927,600
2  Apr 01, 2021  529.93  563.56  499.00  513.47     513.47  111,573,300
3  Mar 01, 2021  545.57  556.99  492.85  521.66     521.66   90,183,900
4  Feb 01, 2021  536.79  566.65  518.28  538.85     538.85   61,902,300


In [11]:
df_cargado.columns = df_cargado.columns.str.strip()

In [13]:
print(df_cargado.dtypes)

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume        object
dtype: object


In [15]:
df_cargado['Close'] = pd.to_numeric(df_cargado['Close'], errors='coerce')

In [16]:
datos_filtrados = df_cargado[df_cargado['Close']>300]
print(datos_filtrados)

            Date    Open    High     Low   Close  Adj Close       Volume
0   Jun 01, 2021  504.01  536.13  482.14  528.21     528.21   78,560,600
1   May 01, 2021  512.65  518.95  478.54  502.81     502.81   66,927,600
2   Apr 01, 2021  529.93  563.56  499.00  513.47     513.47  111,573,300
3   Mar 01, 2021  545.57  556.99  492.85  521.66     521.66   90,183,900
4   Feb 01, 2021  536.79  566.65  518.28  538.85     538.85   61,902,300
5   Jan 01, 2021  539.00  593.29  485.67  532.39     532.39  139,988,600
6   Dec 01, 2020  492.34  545.50  491.29  540.73     540.73   77,564,100
7   Nov 01, 2020  478.87  518.73  463.41  490.70     490.70   91,788,900
8   Oct 01, 2020  506.03  572.49  472.21  475.74     475.74  154,302,400
9   Sep 01, 2020  532.60  557.39  458.60  500.03     500.03  118,796,900
10  Aug 01, 2020  490.86  549.04  466.55  529.56     529.56  116,261,900
11  Jul 01, 2020  454.00  575.37  454.00  488.88     488.88  232,206,300
12  Jun 01, 2020  418.83  474.01  404.25  455.04   

In [18]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Paso 1: Enviar la solicitud HTTP a la página web
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/netflix_data_webpage.html"
response = requests.get(url)

# Paso 2: Parsear el contenido HTML de la página web
soup = BeautifulSoup(response.text, 'html.parser')

# Paso 3: Encontrar el cuerpo de la tabla
table_body = soup.find("tbody")

# Paso 4: Inicializar listas para almacenar los datos
dates = []
opens = []
highs = []
lows = []
closes = []
adj_closes = []
volumes = []

# Paso 5: Recorrer cada fila de la tabla y extraer los valores de las columnas
for row in table_body.find_all('tr'):
    col = row.find_all("td")
    
    # Verificar que hay suficientes columnas
    if len(col) >= 7:
        date = col[0].text.strip()
        open_price = col[1].text.strip()
        high = col[2].text.strip()
        low = col[3].text.strip()
        close = col[4].text.strip()
        adj_close = col[5].text.strip()
        volume = col[6].text.strip()
        
        # Agregar datos a las listas
        dates.append(date)
        opens.append(open_price)
        highs.append(high)
        lows.append(low)
        closes.append(close)
        adj_closes.append(adj_close)
        volumes.append(volume)

# Paso 6: Crear un DataFrame de pandas con los datos extraídos
netflix_data = pd.DataFrame({
    "Date": dates,
    "Open": opens,
    "High": highs,
    "Low": lows,
    "Close": closes,
    "Adj Close": adj_closes,
    "Volume": volumes
})

# Paso 7: Mostrar las primeras filas del DataFrame
print(netflix_data.head())

# Paso 8: Guardar el DataFrame en un archivo CSV
netflix_data.to_csv('datos-netflix.csv', index=False)

           Date    Open    High     Low   Close Adj Close       Volume
0  Jun 01, 2021  504.01  536.13  482.14  528.21    528.21   78,560,600
1  May 01, 2021  512.65  518.95  478.54  502.81    502.81   66,927,600
2  Apr 01, 2021  529.93  563.56  499.00  513.47    513.47  111,573,300
3  Mar 01, 2021  545.57  556.99  492.85  521.66    521.66   90,183,900
4  Feb 01, 2021  536.79  566.65  518.28  538.85    538.85   61,902,300


In [19]:
netflix_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,"Jun 01, 2021",504.01,536.13,482.14,528.21,528.21,78560600
1,"May 01, 2021",512.65,518.95,478.54,502.81,502.81,66927600
2,"Apr 01, 2021",529.93,563.56,499.0,513.47,513.47,111573300
3,"Mar 01, 2021",545.57,556.99,492.85,521.66,521.66,90183900
4,"Feb 01, 2021",536.79,566.65,518.28,538.85,538.85,61902300


In [20]:
netflix_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
65,"Jan 01, 2016",109.0,122.18,90.11,91.84,91.84,488193200
66,"Dec 01, 2015",124.47,133.27,113.85,114.38,114.38,319939200
67,"Nov 01, 2015",109.2,126.6,101.86,123.33,123.33,320321800
68,"Oct 01, 2015",102.91,115.83,96.26,108.38,108.38,446204400
69,"Sep 01, 2015",109.35,111.24,93.55,103.26,103.26,497401200
