In [None]:
# Consolidated sequential content from `1_Pandas-1.ipynb`
# Markdown: ## Table of Content
# 1. **[Pandas](#pandas)**
#    <img align="center" src="https://habrastorage.org/files/10c/15f/f3d/10c15ff3dcb14abdbabdac53fed6d825.jpg"  width=50% />
# 2. **[Data Structures](#structures)**
# 3. **[Pandas Series](#series)**
#    - 3.1 - [Creating a Series](#creatingS)
#    - 3.2 - [Manipulating Series](#manipulatingS)
# 4. **[Pandas Dataframes](#dataframes)**
#    - 4.1 - [Creating Dataframes](#creatingDF)
#    - Dataframe Methods
#    - 4.2 - [Manipulating Dataframes](#manipulatingDF)
# 5. **[Merge Dataframe](#reading_data)**
# 6. **[Groupby Method](#reading_data)

# Markdown: <a id="pandas"> </a>
# Markdown: ## 1. Pandas
# Markdown: #### Introduction to Pandas

# Markdown with HTML content (commented)
# <table align="left">
#     <tr>
#         <td>
#             <div align="left", style="font-size:120%">
#                 <font color="#21618C">
#                     <b> Pandas contain data structures and data manipulation tools designed for data cleaning and analysis.
# <br><br>
#                         While Pandas adopt many coding idioms from Numpy, the biggest difference is that Pandas are designed for working with tabular or heterogeneous data. Numpy, by contrast, is best suited for working with homogeneous numerical array data.<br><br>
#                          The name Pandas is derived from the term “panel data”, an econometrics term for multidimensional structured data sets.
#                     </b>
#                 </font>
#             </div>
#         </td>
#     </tr>
# </table>

# Markdown: **How to install pandas?**
# 1. You can use-
# `!pip install pandas`
# 2. You can import it as-
# import pandas as pd

# --- Code cell: import pandas ---
import pandas as pd

# Markdown: <a id="structures"> </a>
# Markdown: ## 2. Data Structures
# Markdown: #### Introduction to Data Structures

# Markdown: Pandas has two data structures as follows:
# 1. A Series is 1-dimensional labeled array that can hold data of any type (integer, string, boolean, float, python objects, and so on). It’s axis labels are collectively called an index.
# 2. A DataFrame is 2-dimensional labeled data structure with columns. It supports multiple datatypes.

# Markdown: <a id="series"> </a>
# Markdown: ## 3. Pandas Series
# Markdown: #### Introduction to Pandas Series and Creating Series

# Markdown: Pandas Series is a one-dimensional labeled array capable of holding any data type.
# It will assign a labeled index to each item in the Series. By default, each item will receive an index label from 0 to N, where N is the length of the Series minus one.

# Markdown: <a id="creatingS"> </a>
# Markdown: ### 3.1 Creating a Series

# Markdown: **1. To create a numeric series**

# --- Code cell: create a numeric series ---
numbers = range(1,100,5)
a = pd.Series(numbers)
# type(a)
print(a)

# Markdown: The output also gives the data type of the series as `int64`
# And note that by default, each item will receive an index label from 0 to N, where N is the length of the Series minus one.

# --- Code cell: create a Series with a list ---
s = pd.Series([345, 'London', 34.5, -34.45, 'Happy Birthday'])
print(s)

# --- Code cell: series with marks and index ---
marks = [60, 89, 74, 86]
subject = ["Maths", "Science", "English" , "Social Science"]
mrk= pd.Series(marks,index=subject)
print(mrk)

# --- Code cell: create with dictionary ---
data = {'Maths': 60, 'Science': 89, 'English': 76, 'Social Science': 86}
print(pd.Series(data))

# --- Code cell: with missing values ---
subjects = ["Maths", "Science", "English","Art and Craft" , "Social Science"]
marks_series = pd.Series(data, index = subjects)
print(marks_series)

# Markdown: <a id="manipulatingS"> </a>
# Markdown: ### 3.2 Manipulating Series

# Markdown: #### check null value

# --- Code cell: check null value ---
print(marks_series.isnull().sum())

# --- Code cell: notnull ---
print(marks_series.notnull())

# Markdown: **To know the subjects in which marks score is more than 75**

# --- Code cell: filter >75 ---
print(marks_series[marks_series > 75])

# Markdown: **To assign 68 marks to 'Art and Craft'**

# --- Code cell: show marks_series ---
print(marks_series)

# --- Code cell: assign and show ---
marks_series["Art and Craft"] = 68
print(marks_series)

# --- Code cell: set marks < 70 to 65 ---
marks_series[marks_series<70] = 65
print(marks_series)

# --- Code cell: check math marks eq 73? ---
print(marks_series.Maths == 73)
# or
print(marks_series["Maths"] == 73)

# Markdown: **Sorting a numeric series**

# --- Code cell: create a pandas series for sorting ---
import numpy as np
values = pd.Series([23, 45, np.nan, 41, 23, 34, 55, np.nan, 34, 20])
print(values)

# --- Code cell: ascending order ---
print(values.sort_values(ascending = True))

# --- Code cell: descending order ---
print(values.sort_values(ascending = False))

# --- Code cell: categorical string series ---
string_values = pd.Series(["a", "j", "d", "f", "t", "a"])
print(string_values)

# --- Code cell: sort categorical ascending ---
print(string_values.sort_values(ascending = True))

# --- Code cell: sort categorical descending ---
print(string_values.sort_values(ascending = False))

# Markdown: <a id="dataframes"> </a>
# Markdown: ## 4. Pandas Dataframes
# Markdown: #### Introduction to Dataframes and Creating Dataframes

# Markdown: **1. Creating a dataframe a dictionary**

# --- Code cell: create dataframe from dict ---
data = {'Subject': ['Maths', 'History', 'Science', 'English', 'Georaphy', 'Art'],
        'Marks': (45, 65, 78,65, 80, 78),
        'CGPA': [2.5, 3.0, 3.5, 2.0, 4.0, 4.0]}

df = pd.DataFrame(data)
print(df)

# Markdown: **Note:** Like Series, the resulting Dataframe is assigned index automatically. The 'Marks' values are in a tuple.

# Markdown: **2. To create dataframe from series**

# --- Code cell: create Series variables for DF ---
Subject = pd.Series(['Maths', 'History', 'Science', 'English', 'Georaphy', 'Art'])
Marks = pd.Series([45, 65, 78, 65, 80, 78])
CGPA = pd.Series([2.5, 3.0, 3.5, 2.0, 4.0, 4.0])

# --- Code cell: DataFrame from Series list ---
print(pd.DataFrame([Subject,Marks,CGPA]))

# Markdown: **3. To create dataframe from lists**

# --- Code cell: lists ---
Subject = ['Maths', 'History', 'Science', 'English', 'Georaphy', 'Art']
Marks = [45, 65, 78, 65, 80, 78]
CGPA = [2.5, 3.0, 3.5, 2.0, 4.0, 4.0]

# --- Code cell: DataFrame using lists ---
d=pd.DataFrame([Subject,Marks,CGPA], index = ['Subject','Marks','CGPA']).T
print(d)

# --- Code cell: show columns reorder ---
print(d[['CGPA','Subject']])

# Markdown: ### To read data from csv file**

# --- Code cell: read csv ---
# data = pd.read_csv("Data.csv")
# data
# (Commented out actual read to avoid file dependency; uncomment in notebook environment)

# --- Code cell: show type and print examples ---
# print(type(data))
# print(data)
# data.tail()

# --- Code cell: helpful commented operations ---
# head(), data.shape, data.isnull().sum(), print(data.info()), data.describe(), data.Country.unique(), data.Country.nunique()
# type(data.Age), type(data['Age']), data["Age","Country"], data.Age.sum()

# Markdown: <a id="manipulatingDF"> </a>
# Markdown: ### 4.2  Manipulating Dataframes

# Markdown: ### Add new column and rows

# Markdown: CAUTION note (commented HTML)
# <table align="left"> ... CAUTION: DataFrame[column] works ... </table>

# Markdown: **Adding a new column to the data set**

# --- Code cell: add new column ---
# data["New"] = data["Age"] / data["Salary"]  # Uncomment if `data` exists

# --- Code cell: show data ---
# print(data)

# --- Code cell: add C column ---
# data['C']=[i for i in range(0,10)]
# print(data)

# --- Code cell: insert column 'D' at position 1 ---
# data.insert(1, 'D', [i for i in range(10,20)])
# print(data)

# --- Code cell: set column C to 100 ---
# data['C']=100
# print(data)

# Markdown: **Adding a new row to the data set**
# A new row can be added using the function copy()

# --- Code cell: add a row example ---
# data_copy = data.copy()
# data_copy.loc[10] = [45, 85, 1.8, 26.3,23.7,44]

# --- Code cell: show data copies ---
# print(data)
# print(data_copy)

# Markdown: ### Selecting columns

# Markdown: **Indexing a dataframe using `.iloc`**
# `DataFrame.iloc[]`

# --- Code cell: iloc examples (commented) ---
# data.iloc[2]  # Select 2nd row
# data.iloc[[4,7,9]]  # select multiple rows
# data.iloc[5:9]  # select 5th to 8th row
# data.iloc[:,]  # select column

# Markdown: **Indexing a dataframe using `.loc`**
# `DataFrame.loc[]` method is a method that takes only index labels and returns row or dataframe if the index label exists in the caller dataframe.

# --- Code cell: loc example ---
# data.loc[0:5,["Age","Salary"]]

# Markdown: ### To check for missing values and Imputation**

# --- Code cell: check nulls ---
# print(data.isnull())  # .sum()

# --- Code cell: dropna example ---
# data.dropna(axis=1)

# --- Code cell: fillna example ---
# data.fillna({"Age":5,"Salary":0})
# df.fillna(method="ffill",axis=0)
# df.fillna(0)

# --- Code cell: column-specific fill ---
# df["A"] = df["A"].fillna(df["A"].mean())

# --- Code cell: show data ---
# print(data)

# --- Code cell: fill age with mean ---
# data['Age'] = data['Age'].fillna(data['Age'].mean())
# print(data)

# Markdown: ### Delete rows and columns

# --- Code cell: delete column example ---
# df=data.drop(columns=['D','C'])
# print(df)

# --- Code cell: delete row example ---
# df = df.drop(index=9)
# print(df)

# --- Code cell: update rows ---
# df.loc[1] = [100, 200, 300,444,78]
# print(df)

# --- Code cell: replace values ---
# df['Country'] = df['Country'].replace(100, 'Pakistan')
# print(df)

# --- Code cell: iat update ---
# df.iat[1, 2] = 399
# print(df)

# Markdown: Note about drop() returning new DF unless inplace=True.

# --- Code cell: Drop duplicated data example ---
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Company':['Apple','Walmart','Walmart','Intel','Apple','Walmart','Apple','Cognizant','Apple','Apple','Cognizant','Walmart'],
    'Salary':[8500,6300,5500,7400,3100,7700,8500,6300,4200,6200,8900,7700]
}
df_dup = pd.DataFrame(d,columns=['Name','Company','Salary'])
print(df_dup)

# Markdown: **duplicate rows based on all columns**

# --- Code cell: find duplicate rows ---
duplicate_df = df_dup[df_dup.duplicated()]
print("Duplicate Rows except first occurrence based on all columns are :")
print(duplicate_df)

# Markdown: **Duplicate Rows Based on Selected Columns**

duplicate_df_name = df_dup[df_dup.duplicated('Name')]
print(duplicate_df_name)

# --- Code cell: duplicates by two columns ---
duplicate_df_two = df_dup[df_dup.duplicated(['Name', 'Company'])]
print(duplicate_df_two)

# Markdown: **Dropping the Duplicate Rows**

# --- Code cell: drop duplicates (returns new DF) ---
print(df_dup.drop_duplicates())

# --- Code cell: export commented ---
# data.to_csv("new_data.csv")

# Markdown: #### .apply()

# --- Code cell: apply example ---
import pandas as pd
# Sample DataFrame
df_apply = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6]
})
# Apply a function to double the values in column 'A'
df_apply['A'] = df_apply['A'].apply(lambda x: x * 2)
print(df_apply)

# Markdown: <a id="Groupby"> </a>
# Markdown: ### 5. Groupby in Pandas
# Markdown: groupby in pandas is one of the most powerful tools, it lets you split → apply → combine data.

# --- Code cell: create a dataframe for groupby example ---
my_df1=pd.DataFrame({
 'Product_ID':[101,102,103,104,105,106],
 'Food_Product':['Cakes','Biscuits','Fruit','Beverages','Cakes','Beverages'],
 'Brand':['Baskin Robbins','Blue Riband','Peach','Horlicks','Mars Muffin','Mirinda'],
 'Sales': [5000, 8000, 7600, 5500, 6500, 9000],
 'Profit': [55000, 67000, 89000, 78000, 55000, 90000]
})
print(my_df1)

# --- Code cell: view df ---
print(my_df1)

# Markdown: **total sales of each food product**

# --- Code cell: groupby sum ---
print(my_df1.groupby('Food_Product')['Sales'].sum())

# --- Code cell: groupby sum to frame and sort ---
print(my_df1.groupby('Food_Product')['Sales'].sum().to_frame().reset_index().sort_values(by='Sales'))

# Markdown: **Hierarchical Indices Created by Groupby**

# --- Code cell: agg example ---
print(my_df1.groupby('Food_Product').agg({'Sales':['min','max','mean']}))

# --- Code cell: flatten agg into readable columns ---
result = my_df1.groupby('Food_Product').agg(
    Min_Sales=('Sales', 'min'),
    Max_Sales=('Sales', 'max'),
    Avg_Sales=('Sales', 'mean')
).reset_index()
print(result)

In [None]:
import math
import queue
import networkx as nx
import matplotlib.pyplot as plt

graph = {
   'A': [('B', 8), ('D', 3), ('F', 6)],
   'B': [('A', 8), ('C', 3), ('D', 2)],
   'C': [('B', 3), ('E', 5)],
   'D': [('A', 3), ('B', 2), ('C', 1), ('E', 8), ('G', 7)],
   'E': [('C', 5), ('D', 8), ('I', 5), ('J', 3)],
   'F': [('A', 6), ('G', 1), ('H', 7)],
   'G': [('D', 7), ('F', 1), ('I', 1)],
   'H': [('F', 7), ('I', 2)],
   'I': [('E', 5), ('G', 1), ('H', 2), ('J', 3)],
   'J': [('E', 3), ('I', 3)]
}

coordinates = {
 'A': (0, 0), 'B': (2, 1), 'C': (4, 1), 'D': (1, -2), 'E': (6, 0),
 'F': (-1, -3), 'G': (2, -4), 'H': (0, -6), 'I': (4, -5), 'J': (7, -3)
}

def heuristic(n, goal):
    (x1, y1) = coordinates[n]
    (x2, y2) = coordinates[goal]
    return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)

def get_path(came_from, current):
    path = []
    while current in came_from:
        path.append(current)
        current = came_from[current]
    path.append(current)
    return path[::-1]

def greedy_best_first(start, goal):
    q = queue.PriorityQueue()
    q.put((heuristic(start, goal), start))
    visited = set()
    came_from = {}

    while not q.empty():
        _, node = q.get()

        if node == goal:
            return get_path(came_from, node)

        visited.add(node)

        for neighbor, cost in graph[node]:
            if neighbor not in visited:
                came_from[neighbor] = node
                q.put((heuristic(neighbor, goal), neighbor))
    return None

def astar(start, goal):
    q = queue.PriorityQueue()
    q.put((heuristic(start, goal), 0, start))
    came_from = {}
    g_cost = {start: 0}

    while not q.empty():
        f, g, node = q.get()

        if node == goal:
            return get_path(came_from, node), g_cost[goal]

        for neighbor, cost in graph[node]:
            temp_g = g + cost
            if temp_g < g_cost.get(neighbor, float('inf')):
                g_cost[neighbor] = temp_g
                came_from[neighbor] = node
                f_score = temp_g + heuristic(neighbor, goal)
                q.put((f_score, temp_g, neighbor))
    return None, None

greedy_path = greedy_best_first("A", "B")
astar_path, astar_cost = astar("A", "B")
print("Greedy Best First Path:", greedy_path)
print("A* Path:", astar_path)
print("A* Total Cost:", astar_cost)

G = nx.Graph()
for node in graph:
    for neighbor, cost in graph[node]:
        G.add_edge(node, neighbor, weight=cost)

pos = coordinates
plt.figure(figsize=(8,6))
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=800, font_weight='bold')
labels = nx.get_edge_attributes(G,'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)

if astar_path:
    edges = [(astar_path[i], astar_path[i+1]) for i in range(len(astar_path)-1)]
    nx.draw_networkx_edges(G, pos, edgelist=edges, width=3, edge_color='pink')

if greedy_path:
    edges = [(greedy_path[i], greedy_path[i+1]) for i in range(len(greedy_path)-1)]
    nx.draw_networkx_edges(G, pos, edgelist=edges, width=2, edge_color='blue')

plt.show()

import sys
print(sys.executable)

In [None]:
# Combined code from notebook (BFS & DFS for Path finding)

from collections import deque
import networkx as nx
import matplotlib.pyplot as plt

# Adjacency list
graph = {
    'A': ['B', 'C'],
    'B': ['D', 'E'],
    'C': ['F'],
    'D': [],
    'E': ['F'],
    'F': []
}

# Draw the graph
G = nx.DiGraph()
for node, neighbors in graph.items():
    for neighbor in neighbors:
        G.add_edge(node, neighbor)

plt.figure(figsize=(6, 6))
pos = nx.spring_layout(G)
nx.draw(
    G, pos,
    with_labels=True,
    node_size=2000,
    node_color="lightblue",
    font_size=12,
    font_weight="bold",
    arrows=True,
    arrowsize=20
)
plt.title("Graph Visualization using NetworkX")
plt.show()

# BFS
def bfs(graph, start, goal=None):
    visited = set()
    queue = deque([start])

    print(f"Start BFS from: {start}")
    print(f"Initial Queue: {list(queue)}\n")

    while queue:
        node = queue.popleft()
        if node not in visited:
            print(f"Visit: {node}")
            visited.add(node)
            print(f"Visited Set: {visited}")

            if node == goal:
                print(f"Goal {goal} found!")
                return

            for neighbor in graph[node]:
                if neighbor not in visited:
                    queue.append(neighbor)

            print(f"Queue after exploring {node}: {list(queue)}\n")

    if goal:
        print(f"Goal {goal} not found in graph.")

print("BFS Traversal with Steps:\n")
bfs(graph, 'A', 'F')

# DFS iterative
def dfs(graph, start, goal=None):
    visited = set()
    stack = [start]

    print(f"Start DFS from: {start}")
    print(f"Initial Stack: {stack}\n")

    while stack:
        node = stack.pop()
        if node not in visited:
            print(f"Visit: {node}")
            visited.add(node)
            print(f"Visited Set: {visited}")

            if node == goal:
                print(f"Goal {goal} found!")
                return True

            for neighbor in reversed(graph[node]):
                if neighbor not in visited:
                    stack.append(neighbor)

            print(f"Stack after exploring {node}: {stack}\n")

    if goal:
        print(f"Goal {goal} not found in graph.")
    return False

print("DFS Traversal with Goal:\n")
dfs(graph, 'A', goal='F')

In [None]:
# Minimax Tic-Tac-Toe combined code

board = [' ' for _ in range(9)]

def print_board(board):
    for row in [board[i*3:(i+1)*3] for i in range(3)]:
        print("| " + " | ".join(row) + " |")

def check_winner(board, player):
    win_combos = [
        [0, 1, 2], [3, 4, 5], [6, 7, 8], # Rows
        [0, 3, 6], [1, 4, 7], [2, 5, 8], # Columns
        [0, 4, 8], [2, 4, 6]             # Diagonals
    ]
    for combo in win_combos:
        if all(board[i] == player for i in combo):
            return True
    return False

def is_draw(board):
    return ' ' not in board

def available_moves(board):
    return [i for i, spot in enumerate(board) if spot == ' ']

def minimax(board, depth, is_maximizing):
    player = 'O'  # AI
    opponent = 'X'  # Human

    if check_winner(board, player):
        return 1
    elif check_winner(board, opponent):
        return -1
    elif is_draw(board):
        return 0

    if is_maximizing:  # AI moves 'O'
        best_score = float('-inf')
        for move in available_moves(board):
            board[move] = player
            score = minimax(board, depth + 1, False)
            board[move] = ' '  # Undo move
            best_score = max(score, best_score)
        return best_score
    else:  # human moves 'X'
        best_score = float('inf')
        for move in available_moves(board):
            board[move] = opponent
            score = minimax(board, depth + 1, True)
            board[move] = ' '  # Undo move
            best_score = min(score, best_score)
        return best_score

def best_move(board):
    best_score = float('-inf')
    move = None
    for i in available_moves(board):
        board[i] = 'O'  # AI move
        score = minimax(board, 0, False)
        board[i] = ' '  # Undo move
        if score > best_score:
            best_score = score
            move = i
    return move

def play_game():
    print("Welcome to Tic-Tac-Toe!")
    current_player = 'X'  # Human starts

    while True:
        print_board(board)
        if current_player == 'X':
            try:
                move = int(input("Enter your move (0-8): "))
            except Exception:
                print("Invalid input. Enter a number 0-8.")
                continue
            if move < 0 or move > 8:
                print("Invalid move. Choose 0-8.")
                continue
            if board[move] == ' ':
                board[move] = 'X'
                if check_winner(board, 'X'):
                    print_board(board)
                    print("You win!")
                    break
                current_player = 'O'
            else:
                print("Spot already taken. Choose another.")
        else:
            print("AI is making a move...")
            move = best_move(board)
            if move is None:
                # No moves left
                if is_draw(board):
                    print_board(board)
                    print("It's a draw!")
                    break
                else:
                    # Fallback: pick first available
                    moves = available_moves(board)
                    if moves:
                        move = moves[0]
                    else:
                        print_board(board)
                        print("It's a draw!")
                        break
            board[move] = 'O'
            if check_winner(board, 'O'):
                print_board(board)
                print("AI wins!")
                break
            current_player = 'X'

        if is_draw(board):
            print_board(board)
            print("It's a draw!")
            break

# Start the game
play_game()

In [None]:
import matplotlib.pyplot as plt
import networkx as nx

regions = {
    'WA': ['NT', 'SA'],
    'NT': ['WA', 'Q', 'SA'],
    'Q': ['NT', 'NSW', 'SA'],
    'NSW': ['Q', 'SA', 'V'],
    'V': ['SA', 'NSW'],
    'SA': ['WA', 'NT', 'Q', 'NSW', 'V'],
    'T': ['SA']
}

colors = ['red', 'green', 'blue']
assignment = {}

def is_valid_assignment(region, color):
    for neighbor in regions[region]:
        if neighbor in assignment and assignment[neighbor] == color:
            return False
    return True

def select_unassigned_region():
    for region in regions:
        if region not in assignment:
            return region
    return None

def backtrack():
    if len(assignment) == len(regions):
        return assignment

    unassigned_region = select_unassigned_region()
    for color in colors:
        if is_valid_assignment(unassigned_region, color):
            assignment[unassigned_region] = color
            result = backtrack()
            if result:
                return result
            del assignment[unassigned_region]
    return None

def visualize():
    G = nx.Graph()
    for region in regions:
        G.add_node(region)
        for neighbor in regions[region]:
            G.add_edge(region, neighbor)

    color_map = [assignment.get(region, 'lightgrey') for region in G.nodes]
    plt.figure(figsize=(10, 6))
    nx.draw(G, with_labels=True, node_color=color_map, node_size=2000, font_size=16, font_color='white')
    plt.title("Australia Map Coloring Problem", fontsize=20)
    plt.show()

solution = backtrack()

if solution:
    print("Color assignments for the regions:")
    for region, color in solution.items():
        print(f"{region}: {color}")
    visualize()
else:
    print("No solution exists.")