In [None]:
# What is a parameter?

Parameters are internal variables in a machine learning model that are learned from the training data. For example, in linear regression, the coefficients (weights) and intercept are parameters.

from sklearn.linear_model import LinearRegression
import numpy as np

# Sample data
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
y = np.array([2, 3, 4, 5])

# Model initialization
model = LinearRegression()

# Fitting the model
model.fit(X, y)

# Getting parameters (coefficients and intercept)
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)


In [None]:
# What is correlation?

Correlation is a statistical measure that describes the extent to which two variables move in relation to each other. It is usually expressed as a number between -1 and 1. A correlation of 1 means they move in the same direction perfectly, -1 means they move in opposite directions perfectly, and 0 means no relationship.  

Negative correlation means that as one variable increases, the other decreases. For example, as the number of hours worked increases, leisure time decreases.

import pandas as pd

# Sample data
data = {'x': [1, 2, 3, 4, 5], 'y': [10, 9, 6, 5, 2]}
df = pd.DataFrame(data)

# Compute correlation
correlation = df.corr()
print(correlation)


import seaborn as sns
import matplotlib.pyplot as plt

# Sample data with negative correlation
data = {'x': [1, 2, 3, 4, 5], 'y': [10, 9, 7, 5, 2]}
df = pd.DataFrame(data)

# Plot negative correlation
sns.scatterplot(x='x', y='y', data=df)
plt.show()


In [None]:
# Define Machine Learning. What are the main components in Machine Learning?

Machine Learning is a subset of artificial intelligence that enables a system to automatically learn from data and improve performance over time without being explicitly programmed.  

Data: The raw material for machine learning models.

Model: A mathematical representation of a real-world process.

Training: The process of learning the model parameters from data.

Evaluation: Assessing the model's performance.

Prediction: Using the model to make decisions or predictions.


In [None]:
# How does loss value help in determining whether the model is good or not?

The loss value measures how far the predictions of the model are from the actual values. A lower loss indicates better model performance.

 (Loss value in Regression):

from sklearn.metrics import mean_squared_error

# Predictions and actual values
y_true = [2, 3, 4, 5]
y_pred = [2.1, 2.9, 4.2, 4.8]

# Mean Squared Error (MSE) as the loss value
mse = mean_squared_error(y_true, y_pred)
print("Mean Squared Error:", mse)

In [1]:
# What are continuous and categorical variables?

Continuous Variables: Variables that can take an infinite number of values, e.g., height, weight.
Categorical Variables: Variables that represent categories, e.g., gender, country.

In [None]:
# How do we handle categorical variables in Machine Learning? What are the common t echniques?

Categorical variables are converted into numerical form using techniques like one-hot encoding or label encoding.

 (One-Hot Encoding in Python using Scikit-learn):


from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Sample categorical data
categories = np.array([['Male'], ['Female'], ['Female'], ['Male']])

# One-Hot Encoding
encoder = OneHotEncoder()
encoded = encoder.fit_transform(categories).toarray()
print(encoded)

In [None]:
# What do you mean by training and testing a dataset?

raining a dataset means using it to teach the model by adjusting its parameters. Testing a dataset means evaluating the model's performance on unseen data.

# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np

# Sample data (X = features, y = target)
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])  # Feature values
y = np.array([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])  # Target values

# Step 1: Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Initialize the model (Linear Regression)
model = LinearRegression()

# Step 3: Train the model on the training data
model.fit(X_train, y_train)

# Step 4: Make predictions on the test data
y_pred = model.predict(X_test)

# Step 5: Evaluate the model's performance (using Mean Squared Error)
mse = mean_squared_error(y_test, y_pred)

# Display results
print("Test Set Predictions:", y_pred)
print("Mean Squared Error:", mse)


In [None]:
# What is sklearn.preprocessing?

sklearn.preprocessing is a module in Scikit-learn used for data preprocessing tasks like scaling, normalization, encoding, and more.

In [None]:
# What is a Test set?

 A test set is a portion of the dataset that is used to evaluate the performance of the machine learning model after it has been trained.


In [None]:
# How do we split data for model fitting (training and testing) in Python?
#  How do you approach a Machine Learning problem?

from sklearn.model_selection import train_test_split
import numpy as np

# Sample data
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
y = np.array([2, 3, 4, 5])

# Split data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training Set:", X_train, y_train)
print("Test Set:", X_test, y_test)

Define the problem.
Collect and preprocess data.
Perform Exploratory Data Analysis (EDA).
Split the data into training and testing sets.
Choose and train a model.
Evaluate the model.
Optimize the model.


In [None]:
# Why do we have to perform EDA before fitting a model to the data?

EDA helps to understand the data distribution, identify patterns, and detect any anomalies or missing values before building a machine learning model.

In [None]:
# What is correlation?

Correlation measures the relationship between two variables and how they move in relation to each other. It ranges from -1 to 1, where 1 indicates perfect positive correlation, -1 indicates perfect negative correlation, and 0 means no correlation.  



In [None]:
# What does negative correlation mean?

Negative correlation means that as one variable increases, the other variable tends to decrease, and vice versa. For example, if temperature decreases, heating bills increase.

In [None]:
# How can you find correlation between variables in Python?

import pandas as pd

# Sample data
data = {'x': [1, 2, 3, 4, 5], 'y': [10, 9, 6, 5, 2]}
df = pd.DataFrame(data)

# Compute correlation
correlation = df.corr()
print(correlation)


In [None]:
# What is causation? Explain difference between correlation and causation with an example.?

Correlation: Measures the relationship between two variables.

Causation: Indicates that one event is the result of the occurrence of another event.

Example: Correlation may show that ice cream sales and drowning incidents increase in summer, but ice cream doesn't cause drowning; the hot weather does.

In [None]:
# What is an Optimizer? What are different types of optimizers? Explain each with an example.

An optimizer in machine learning adjusts the model's parameters (like weights) to minimize the loss function. Examples of optimizers include:

SGD (Stochastic Gradient Descent)
Adam (Adaptive Moment Estimation)

import tensorflow as tf

# Sample model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile model with Adam optimizer
model.compile(optimizer='adam', loss='mean_squared_error')


In [None]:
# What is sklearn.linear_model ?

sklearn.linear_model is a Scikit-learn module for implementing linear models such as Linear Regression, Logistic Regression, Ridge, Lasso, etc.

In [None]:
# What does model.fit() do? What arguments must be given?

 model.fit() trains the machine learning model by adjusting its parameters using the provided data.

 model.fit(X_train, y_train)


In [None]:
# What does model.predict() do? What arguments must be given?

model.fit() is used to train the machine learning model. It adjusts the model's parameters based on the input data (features) and target labels.

Arguments:

X (Features/Independent variables)
y (Target/Dependent variable)

from sklearn.linear_model import LinearRegression

X_train = [[1], [2], [3], [4]]
y_train = [2, 4, 6, 8]

model = LinearRegression()
model.fit(X_train, y_train)  # Train the model


In [None]:
# What are continuous and categorical variables?

Continuous Variables: Variables that can take an infinite range of values (e.g., height, weight).
Categorical Variables: Variables that represent categories or groups (e.g., gender, country).

In [None]:
# What is feature scaling? How does it help in Machine Learning?

Feature scaling is a technique to normalize or standardize the range of independent variables/features. It helps by ensuring that features with larger scales do not dominate others and allows gradient-based algorithms to converge faster.


In [None]:
# How do we perform scaling in Python?

from sklearn.preprocessing import StandardScaler

# Sample data
X = [[1, 2], [2, 3], [3, 4], [4, 5]]

# Scaling the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print(X_scaled)


In [None]:
# What is sklearn.preprocessing?

sklearn.preprocessing is a module in Scikit-learn used for transforming and normalizing data, including scaling, encoding, and imputing missing values

In [None]:
# How do we split data for model fitting (training and testing) in Python?

from sklearn.model_selection import train_test_split

# Sample data
X = [[1], [2], [3], [4], [5], [6]]
y = [2, 4, 6, 8, 10, 12]

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training data:", X_train)
print("Test data:", X_test)


In [None]:
# Explain data encoding?

Data encoding converts categorical variables into numerical values so they can be used in machine learning models. Common techniques include:

Label Encoding: Converts each category into a numerical label.
One-Hot Encoding: Creates binary columns for each category.

from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Sample categorical data
categories = np.array([['Male'], ['Female'], ['Female'], ['Male']])

# One-Hot Encoding
encoder = OneHotEncoder()
encoded = encoder.fit_transform(categories).toarray()
print(encoded)
