Task 1: Intermediate Python I1. Modular Code:
   - Write Python functions to modularize your code for data cleaning, feature engineering, and model training.

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


def load_data():
    try:
        data=pd.read_csv('train.csv')  
        return data
    except Exception as e:
        print("error loading data: {e}")
        return None


def clean_data(data):
    try:
        data=data.dropna()  # Drop rows with missing values
        return data
    except Exception as e:
        print("error cleaning data: {e}")
        return 0

#function to prepare features and target
def prepare_data(data):
    try:
        X = data.drop('target', axis=1)  # Features
        y = data['target']  # Target
        return X, y
    except KeyError as e:
        print(f"Error preparing data: {e}")
        return None, None

# Function to train the model
def train_model(X_train, y_train):
    try:
        model=RandomForestClassifier()
        model.fit(X_train, y_train)
        return model
    except Exception as e:
        print(f"Error training model: {e}")
        return None


def evaluate_model(model, X_test, y_test):
    try:
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print("Model accuracy: {accuracy:.2f}")
    except Exception as e:
        print("error evaluating model: {e}")


if __name__ == "__main__":
    data = load_data()
    if data is not None:
        data = clean_data(data)
        if data is not None:
            X, y = prepare_data(data)
            if X is not None and y is not None:
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
                model = train_model(X_train, y_train)
                if model is not None:
                    evaluate_model(model, X_test, y_test)


Error preparing data: "['target'] not found in axis"


2. List Comprehensions and Lambda Functions:

In [17]:
numbers=[9,3,5,6,78,4]
squared_numbers = [x**2 for x in numbers]
print("Squared numbers: {squared_numbers}")
#lambda function to filter out even numbers
even_numbers = list(filter(lambda x: x % 2 == 0, numbers))
print("Even numbers: {even_numbers}")


Squared numbers: {squared_numbers}
Even numbers: {even_numbers}


Intermediate Python II1. Advanced Data Structures

Sets

In [26]:
numbers=[3,1,98,54,45.87]

#convert list to set to get unique elements
unique_numbers = set(numbers)
print("Unique numbers: ",unique_numbers)

#set operations
set1={1, 2, 3, 4}
set2={3, 4, 5, 6}

# Union of sets
union_set=set1 |set2
print("Union:",union_set)

# Intersection of sets
intersection_set=set1&set2
print(f"Intersection:",intersection_set)

#Difference of sets
difference_set=set1 - set2
print("Difference: ",difference_set)


Unique numbers:  {1, 98, 3, 45.87, 54}
Union: {1, 2, 3, 4, 5, 6}
Intersection: {3, 4}
Difference:  {1, 2}


Dictionary

In [28]:
words=['apple','banana','apple', 'orange', 'banana', 'banana']
#counting occurrences using a dictionary
word_count = {}
for word in words:
    if word in word_count:
        word_count[word] += 1
    else:
        word_count[word] = 1
print("Word counts",word_count)

#dictionary operations
data = {'name': 'Alice', 'age': 25, 'city': 'Wonderland'}
#name is key and 25 is value allover known as keyvalues
#accessing values
print("Name:",data['name'])
print("Age:",data['age'])

#adding a new key-value pair
data['occupation'] = 'Adventurer'
print("Updated data: ",data)

#removing a key-value pair
del data['city']
print("Data after deletion:",data)


Word counts {'apple': 2, 'banana': 3, 'orange': 1}
Name: Alice
Age: 25
Updated data:  {'name': 'Alice', 'age': 25, 'city': 'Wonderland', 'occupation': 'Adventurer'}
Data after deletion: {'name': 'Alice', 'age': 25, 'occupation': 'Adventurer'}


Tuples

In [20]:
#creating a tuple
my_tuple=(1, 2, 3, 'apple','cake')

#accessing elements by index
print("First element: {my_tuple[0]}")
print("Last element: {my_tuple[-1]}")

#slicing tuples
print("Elements from index 1 to 3: {my_tuple[1:4]}")

#nested tuple
nested_tuple=(1, (2, 3), (4, (5, 6)))

#accessing elements in a nested tuple
print(f"First element of nested tuple: {nested_tuple[0]}")
print(f"Second element of nested tuple: {nested_tuple[1]}")
print(f"Element within a nested tuple: {nested_tuple[2][1][0]}")


First element: {my_tuple[0]}
Last element: {my_tuple[-1]}
Elements from index 1 to 3: {my_tuple[1:4]}
First element of nested tuple: 1
Second element of nested tuple: (2, 3)
Element within a nested tuple: 5


Lits in python

In [31]:
my_list=[1, 2, 3, 'apple', 'banana']
#Adding an element to the end of the list
my_list.append('orange')
print("List after append:",my_list)
last_item=my_list.pop()
print("Removed item: ",last_item)
print("List after pop:",my_list)
#extending the list with another list
my_list.extend(['grape', 'kiwi'])
print("List after extend:",my_list)

print("Original list:",my_list)
# Sorting the list will on all integers or all strings
numbers=[4, 2, 9, 1, 5]
numbers.sort()
print("Sorted list: ",numbers)


List after append: [1, 2, 3, 'apple', 'banana', 'orange']
Removed item:  orange
List after pop: [1, 2, 3, 'apple', 'banana']
List after extend: [1, 2, 3, 'apple', 'banana', 'grape', 'kiwi']
Original list: [1, 2, 3, 'apple', 'banana', 'grape', 'kiwi']
Sorted list:  [1, 2, 4, 5, 9]


Example: Using multiprocessing Module

In [22]:
import multiprocessing
import time


def print_numbers():
    for i in range(5):
        time.sleep(1)  # Simulating a time-consuming task
        print(f"Number: {i}")

# Create processes
process1=multiprocessing.Process(target=print_numbers)
process2=multiprocessing.Process(target=print_numbers)

#start processes
process1.start()
process2.start()

#wait for processes to complete
process1.join()
process2.join()

print("Both processes have finished execution.")


Both processes have finished execution.


In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

class BasicMLPipeline:
    def __init__(self, data, target_column):
        self.data=data
        self.target_column=target_column
        self.model=None
        self.X_train=None
        self.X_test=None
        self.y_train=None
        self.y_test=None

    def clean_data(self):
        #Drop rows with missing values
        self.data=self.data.dropna()
        print("Data cleaned.")

    def prepare_data(self):
        #Aimining variables & preparing different features
        X=self.data.drop(self.target_column, axis=1)
        y=self.data[self.target_column]
        return X, y

    def split_data(self):
        #Divide the given data into sets of training and test
        X, y=self.prepare_data()
        self.X_train, self.X_test, self.y_train, self.y_test=train_test_split(X, y, test_size=0.2, random_state=42)
        print("Data split into training and test sets.")

    def scale_features(self):
        #Standardizing various features
        scaler=StandardScaler()
        self.X_train=scaler.fit_transform(self.X_train)
        self.X_test=scaler.transform(self.X_test)
        print("Features are scaled.")

    def train_model(self):
        #Training model of RandomForestClassifier
        self.model=RandomForestClassifier()
        self.model.fit(self.X_train, self.y_train)
        print("Model is now trained.")

    def evaluate_model(self):
        #Evaluating this model on the give testing set
        if self.model is None:
            raise RuntimeError("Unfortunately, model is not trained,Now Call the 'train_model' first")
        y_pred=self.model.predict(self.X_test)
        accuracy=accuracy_score(self.y_test, y_pred)
        print("Model's accuracy: {accuracy:.2f}")

    def run_pipeline(self):
        #Now running the whole pipeline
        self.clean_data()
        self.split_data()
        self.scale_features()
        self.train_model()
        self.evaluate_model()

        #Example usage
if __name__=="___main___":
    
    data=pd.read_csv('test.csv')

    #Initializing and running the pipeline
    pipeline=BasicMLPipeline(data, target_column='target')
    pipeline.run_pipeline()
