In [1]:
import requests
from bs4 import BeautifulSoup
import sqlite3

# Step 1: Scrape data
url = "https://scrapethissite.com/pages/simple/"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

countries = []
for country in soup.find_all("div", class_="country"):
    name = country.find("h3", class_="country-name").text.strip()
    population = country.find("span", class_="country-population").text.strip()
    area = country.find("span", class_="country-area").text.strip()
    countries.append((name, int(population.replace(',', '')), float(area.replace(',', ''))))

# Step 2: Save data to SQLite
conn = sqlite3.connect('countries.db')
cursor = conn.cursor()

cursor.execute('''
    CREATE TABLE IF NOT EXISTS countries (
        name TEXT,
        population INTEGER,
        area REAL
    )
''')

cursor.executemany('INSERT INTO countries (name, population, area) VALUES (?, ?, ?)', countries)
conn.commit()
conn.close()


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load data from SQLite
conn = sqlite3.connect('countries.db')
df = pd.read_sql_query('SELECT * FROM countries', conn)
conn.close()

# Step 1: Preprocessing
X = df[['population']]  # Features (جمعیت)
y = df['area']          # Target (مساحت)

# Step 2: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 4: Predict and evaluate
y_pred = model.predict(X_test)
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

# Example prediction
population_input = 1000000  # جمعیت فرضی
predicted_area = model.predict([[population_input]])
print(f"Predicted Area for population {population_input}: {predicted_area[0]:.2f}")


Mean Absolute Error: 742849.1499890978
Mean Squared Error: 2379391410530.9595
Predicted Area for population 1000000: 418169.37


