In [None]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

num_samples = 1000
num_features = 5

X = np.random.randn(num_samples, num_features)

true_coefficients = np.random.uniform(-10, 10, size=(num_features,))

y = np.dot(X, true_coefficients) + np.random.randn(num_samples)

fig, axes = plt.subplots(num_features, 1, figsize=(8, 6 * num_features))

for i in range(num_features):
    axes[i].scatter(X[:, i], y, s=10, label='Data')

    coeffs = np.polyfit(X[:, i], y, 1)
    best_fit_line = np.poly1d(coeffs)
    x_range = np.linspace(X[:, i].min(), X[:, i].max(), 100)
    axes[i].plot(x_range, best_fit_line(x_range), color='red', label='Best Fit Line')

    axes[i].set_xlabel(f'Feature {i+1}')
    axes[i].set_ylabel('Target')
    axes[i].legend()

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

np.random.seed(42)

num_samples = 1000
num_features = 2
num_classes = 2
num_clusters = 2

X, y = make_classification(n_samples=num_samples,n_features=num_features,n_informative=num_features,n_redundant=0,n_classes=num_classes,n_clusters_per_class=num_clusters)

plt.figure(figsize=(8, 6))

plt.scatter(X[y == 0, 0], X[y == 0, 1], color='blue', label='Class 0')

plt.scatter(X[y == 1, 0], X[y == 1, 1], color='red', label='Class 1')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.title('Classification Dataset')
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs

np.random.seed(42)

num_samples = 1000
num_features = 2
num_clusters = 4

X, y = make_blobs(n_samples=num_samples,n_features=num_features,centers=num_clusters)

plt.figure(figsize=(8, 6))

for cluster in range(num_clusters):
    plt.scatter(X[y == cluster, 0], X[y == cluster, 1], label=f'Cluster {cluster+1}')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.title('Clustering Dataset')
plt.show()


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = 'https://www.worldometers.info/coronavirus/'
response = requests.get(url)

soup = BeautifulSoup(response.content, 'html.parser')

table = soup.find('table', {'id': 'main_table_countries_today'})

headers = [th.text.strip() for th in table.find_all('th')]

data = []

rows = table.find_all('tr')
for row in rows:
    row_data = [td.text.strip() for td in row.find_all('td')]
    if len(row_data) == len(headers):
        data.append(row_data)

df = pd.DataFrame(data, columns=headers)

columns_to_keep = ['Country', 'Continent', 'Population', 'TotalCases', 'NewCases',
                   'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered', 'ActiveCases']
df = df[columns_to_keep]

print(df)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

np.random.seed(42)

num_samples = 1000
num_features = 2
num_classes = 2
num_clusters = 1

X, y = make_classification(n_samples=num_samples,n_features=num_features,n_informative=num_features,n_redundant=0,n_classes=num_classes,n_clusters_per_class=num_clusters,weights=[0.95, 0.05])

class_counts = Counter(y)
print("Class Distribution (Original):", class_counts)

plt.figure(figsize=(8, 6))

plt.scatter(X[y == 0, 0], X[y == 0, 1], color='blue', label='Class 0')

plt.scatter(X[y == 1, 0], X[y == 1, 1], color='red', label='Class 1')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.title('Imbalanced Classification Dataset (Original)')
plt.show()

smote = SMOTE(sampling_strategy=0.1)
X_oversampled, y_oversampled = smote.fit_resample(X, y)

oversampled_class_counts = Counter(y_oversampled)
print("Class Distribution (Oversampled):", oversampled_class_counts)

plt.figure(figsize=(8, 6))

plt.scatter(X_oversampled[y_oversampled == 0, 0], X_oversampled[y_oversampled == 0, 1], color='blue', label='Class 0')

plt.scatter(X_oversampled[y_oversampled == 1, 0], X_oversampled[y_oversampled == 1, 1], color='red', label='Class 1')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.title('Imbalanced Classification Dataset (Oversampled)')
plt.show()

undersampler = RandomUnderSampler(sampling_strategy=0.15)
X_undersampled, y_undersampled = undersampler.fit_resample(X, y)

undersampled_class_counts = Counter(y_undersampled)
print("Class Distribution (Undersampled):", undersampled_class_counts)

plt.figure(figsize=(8, 6))


plt.scatter(X_undersampled[y_undersampled == 0, 0], X_undersampled[y_undersampled == 0, 1], color='blue', label='Class 0')

plt.scatter(X_undersampled[y_undersampled == 1, 0], X_undersampled[y_undersampled == 1, 1], color='red', label='Class 1')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.title('Imbalanced Classification Dataset (Undersampled)')
plt.show()


In [None]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

iris = load_iris()
X = iris.data
y = iris.target

df = pd.DataFrame(X, columns=iris.feature_names)
print("First 5 rows of the DataFrame:")
print(df.head())

print("Missing values in the DataFrame:")
print(df.isnull().sum())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print("Training set dimensions:", X_train.shape)
print("Testing set dimensions:", X_test.shape)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

df_train_scaled = pd.DataFrame(X_train_scaled, columns=iris.feature_names)
print("First 5 rows of the standardized training set:")
print(df_train_scaled.head())
