In [27]:
# Regression Examples

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Sample dataset
data = {
    'Size': [1500, 1600, 1700, 1800, 1900],
    'Bedrooms': [3, 3, 4, 4, 5],
    'Age': [10, 15, 20, 25, 30],
    'Price': [300000, 320000, 340000, 360000, 380000]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['Size', 'Bedrooms', 'Age']]
y = df['Price']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Display coefficients
print(f"Coefficients: {model.coef_}")
print(f"Intercept: {model.intercept_}")


Mean Squared Error: 3.3881317890172014e-21
Coefficients: [1.99501247e+02 1.42108547e-14 9.97506234e+00]
Intercept: 648.3790523692151


In [25]:
# Classification example
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Sample dataset
data = {
    'Number of Words': [100, 50, 200, 300, 150],
    'Number of Links': [3, 1, 5, 10, 2],
    'Contains Free': [1, 0, 1, 1, 0],  # 1 for Yes, 0 for No
    'Spam': [1, 0, 1, 1, 0]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['Number of Words', 'Number of Links', 'Contains Free']]
y = df['Spam']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 0.0
Confusion Matrix:
[[0 1]
 [0 0]]


In [28]:
import numpy as np

# Setting a random seed for reproducibility
rnd = np.random.RandomState(seed=123)

# Generating a random array
X = rnd.uniform(low=0.0, high=1.0, size=(3, 5))  # a 3 x 5 array
print(X)

print('='*40)

rnd1 = np.random.RandomState()  # No seed set
random_numbers1 = rnd1.uniform(low=0.0, high=1.0, size=5)
print(random_numbers1)

print('='*40)

rnd2 = np.random.RandomState()  # No seed set again
random_numbers2 = rnd2.uniform(low=0.0, high=1.0, size=5)
print(random_numbers2)


[[0.69646919 0.28613933 0.22685145 0.55131477 0.71946897]
 [0.42310646 0.9807642  0.68482974 0.4809319  0.39211752]
 [0.34317802 0.72904971 0.43857224 0.0596779  0.39804426]]
[0.78288923 0.54833822 0.01058645 0.67631547 0.67534456]
[0.42759106 0.57192585 0.25618607 0.48483994 0.29216425]


In [26]:
import numpy as np
y = np.linspace(0, 12, 5)
print(y)

# Turning the row vector into a column vector
print(y[:, np.newaxis])

[ 0.  3.  6.  9. 12.]
[[ 0.]
 [ 3.]
 [ 6.]
 [ 9.]
 [12.]]


In [30]:
import numpy as np
# Getting the shape or reshaping an array
# Generating a random array
rnd = np.random.RandomState(seed=123)
X = rnd.uniform(low=0.0, high=1.0, size=(3, 5))  # a 3 x 5 array
print(X.shape)
print(X.reshape(5, 3))

(3, 5)
[[0.69646919 0.28613933 0.22685145]
 [0.55131477 0.71946897 0.42310646]
 [0.9807642  0.68482974 0.4809319 ]
 [0.39211752 0.34317802 0.72904971]
 [0.43857224 0.0596779  0.39804426]]


In [29]:
# Indexing by an array of integers (fancy indexing)
indices = np.array([3, 1, 0])
print(indices)
X[:, indices]

[3 1 0]


array([[0.55131477, 0.28613933, 0.69646919],
       [0.4809319 , 0.9807642 , 0.42310646],
       [0.0596779 , 0.72904971, 0.34317802]])

In [None]:
a = 'lorem10orem 10orem 10orem 10orem 10orem 10orem 10orem 10orem 10orem 10orem'
print(a[:20])

lorem10orem 10orem 1


In [31]:
import pandas as pd

# Create a DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Carol'],
    'Age': [30, 25, 28],
    'Country': ['USA', 'Canada', 'UK']
}
df = pd.DataFrame(data)

# Accessing columns
print(df['Name'])  # Prints the 'Name' column

print('='*40)

print(df[['Name', 'Age']])  # Prints the 'Name' and 'Age' columns

print('='*40)

# Accessing rows by index
print(df.iloc[0])  # Prints the first row (Alice's information)

print('='*40)

print(df.iloc[1:3])  # Prints the second and third rows (Bob's and Carol's information)

print('='*40)

# Accessing specific elements
print(df.at[0, 'Name'])  # Prints 'Alice'

print('='*40)

print(df.loc[1, 'Age'])  # Prints 25 (Bob's age)


0    Alice
1      Bob
2    Carol
Name: Name, dtype: object
    Name  Age
0  Alice   30
1    Bob   25
2  Carol   28
Name       Alice
Age           30
Country      USA
Name: 0, dtype: object
    Name  Age Country
1    Bob   25  Canada
2  Carol   28      UK
Alice
25


In [44]:
print(df[0:0]) # in faghat dare row haro slice mikone 
print('='*40)
"""
df.loc is used for label-based indexing. It allows you to access rows and columns by their labels (names).
"""

print(df.loc[0])  # Prints the first row (Alice's information)

print('='*40)

print(df.loc[0:1]) # Prints the first and second rows (Alice's and Bob's information)

Empty DataFrame
Columns: [Name, Age, Country]
Index: []
Name       Alice
Age           30
Country      USA
Name: 0, dtype: object
    Name  Age Country
0  Alice   30     USA
1    Bob   25  Canada
