In [None]:
%pip install opencv-python matplotlib numpy seaborn pillow




In [1]:
import os
import plotly.express as px
import pandas as pd

In [14]:
# Path to train folder
train_path = "train"

# Get all image filenames in the train folder
train_images = set(os.listdir(train_path))
df = pd.read_csv("train\_classes.csv")
# Check how many CSV filenames exist in the train folder
df["exists"] = df["filename"].apply(lambda x: 1 if x in train_images else 0)
df.head()

Unnamed: 0,filename,door,window,zone,exists
0,876_png.rf.db0d0f0666dfaef76a1eae40c56b1c8e.jpg,1,1,1,1
1,682_png.rf.e1609bf15ca2446b64ec86e66c4441ae.jpg,1,1,1,1
2,1023_png.rf.dc5273b1381c49c31feacf585f1ffe34.jpg,1,1,1,1
3,931_png.rf.dfe563ba31bb80b6cf77e897e7083191.jpg,1,1,1,1
4,746_png.rf.dad518aac0115f30652efc7e92a213b3.jpg,1,1,1,1


In [3]:

# Count missing images
missing_images = df[df["exists"] == 0]

In [4]:
# Plot the distribution of doors and windows
fig1 = px.histogram(df, x=["door", "window"], title="Distribution of Doors & Windows",
                    labels={"value": "Count", "variable": "Feature"}, opacity=0.7)
fig1.show()

In [5]:
import os
import cv2
import numpy as np

def count_rooms(image_path):
    """Counts the number of rooms (partitions) in a floor plan and extracts its dimensions."""

    # Load image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return 0, 0, 0  # Return default if image loading fails

    # Apply adaptive thresholding for better border detection
    thresh = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 11, 2)

    # Find contours in the thresholded image
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return 0, 0, 0  # No contours found, return default values

    # Get the largest contour (assuming it's the main floor plan boundary)
    largest_contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(largest_contour)  # Get the bounding box of the floor plan

    # Set dynamic area thresholds based on floor plan size
    min_room_area = 0.001 * (w * h)  # Minimum valid room size (adjustable)
    max_room_area = 0.8 * (w * h)    # Ignore rooms covering too much area (likely not rooms)

    # Filter valid room contours based on size
    room_contours = [c for c in contours if min_room_area < cv2.contourArea(c) < max_room_area]

    return len(room_contours)  # Return room count + floor dimensions


In [6]:
import cv2
import numpy as np

def get_floor_plan_dimensions(image_path):
    """
    Extracts the height and width of the bordered (colored) part of a floor plan.

    Parameters:
        image_path (str): Path to the floor plan image.

    Returns:
        (int, int): (height, width) of the detected floor plan region.
    """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return 0, 0  # Invalid image

    # Apply threshold to detect edges of the floor plan
    _, thresh = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY_INV)

    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Get the bounding box of the largest detected contour (assumed to be the floor plan)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(largest_contour)
        return h, w  # Return height and width of the floor plan

    return 0, 0  # No valid contour found


In [7]:
# Add room count to DataFrame
df["room_count"] = df["filename"].apply(lambda x: count_rooms(os.path.join("/content/drive/MyDrive/train", x)))

# dim = df["filename"].apply(lambda x: pd.Series(get_floor_plan_dimensions(os.path.join("/content/drive/MyDrive/train", x))))




In [8]:
df.head()

Unnamed: 0,filename,door,window,zone,exists,room_count
0,876_png.rf.db0d0f0666dfaef76a1eae40c56b1c8e.jpg,1,1,1,1,"(0, 0, 0)"
1,682_png.rf.e1609bf15ca2446b64ec86e66c4441ae.jpg,1,1,1,1,"(0, 0, 0)"
2,1023_png.rf.dc5273b1381c49c31feacf585f1ffe34.jpg,1,1,1,1,"(0, 0, 0)"
3,931_png.rf.dfe563ba31bb80b6cf77e897e7083191.jpg,1,1,1,1,"(0, 0, 0)"
4,746_png.rf.dad518aac0115f30652efc7e92a213b3.jpg,1,1,1,1,"(0, 0, 0)"


In [9]:
import plotly.express as px
import pandas as pd



# Count occurrences of each room count
room_counts = df["room_count"].value_counts().reset_index()
room_counts.columns = ["room_count", "count"]  # Rename columns properly

# Create bar chart
fig = px.bar(room_counts,
             x="room_count", y="count",
             title="Distribution of Number of Rooms in Floor Plans",
             labels={"room_count": "Number of Rooms", "count": "Number of Floor Plans"},
             text_auto=True,
             color="count",
             color_continuous_scale="blues")

# Show the plot
fig.show()


In [10]:
import plotly.express as px
import plotly.subplots as sp
import pandas as pd

# Ensure no missing data
df = df.dropna(subset=["room_count", "height", "width"])

# Create subplots
fig = sp.make_subplots(rows=1, cols=2, subplot_titles=("Rooms vs Width", "Rooms vs Height"))

# Scatter plot: Rooms vs Width
scatter1 = px.scatter(df, x="room_count", y="width", color="room_count",
                      labels={"room_count": "Number of Rooms", "width": "Width"},
                      color_continuous_scale="viridis")
for trace in scatter1.data:
    fig.add_trace(trace, row=1, col=1)

# Scatter plot: Rooms vs Height
scatter2 = px.scatter(df, x="room_count", y="height", color="room_count",
                      labels={"room_count": "Number of Rooms", "height": "Height"},
                      color_continuous_scale="viridis")
for trace in scatter2.data:
    fig.add_trace(trace, row=1, col=2)

# Update layout
fig.update_layout(title="Relation Between Number of Rooms & Floor Plan Dimensions",
                  showlegend=False)

# Show the plot
fig.show()


KeyError: ['height', 'width']

In [13]:

# Create a 3D scatter plot
fig = px.scatter_3d(df,
                     x="room_count",
                     y="width",
                     z="height",
                     color="room_count",
                     size="room_count",
                     title="3D Relationship: Number of Rooms vs. Floor Plan Dimensions",
                     labels={"room_count": "Number of Rooms", "width": "Width", "height": "Height"},
                     opacity=0.8,
                     color_continuous_scale="viridis")

# Show the plot
fig.show()

ValueError: Value of 'y' is not the name of a column in 'data_frame'. Expected one of ['filename', ' door', ' window', ' zone', 'exists', 'room_count'] but received: width

In [11]:
import plotly.express as px
import pandas as pd

# Ensure no missing data and sort by number of rooms
df = df.dropna(subset=["room_count", "height", "width"]).sort_values(by="room_count")

# Create a new DataFrame for line plotting
line_df = df.groupby("room_count")[["width", "height"]].mean().reset_index()

# Create the line plot
fig = px.line(line_df, x="room_count", y=["width", "height"],
              markers=True,  # Adds dots at each data point
              labels={"room_count": "Number of Rooms", "value": "Dimension (px)", "variable": "Metric"},
              title="Trend of Floor Plan Dimensions vs. Number of Rooms")

# Show the plot
fig.show()


KeyError: ['height', 'width']

The given observations between number of rooms and dimensions show no clear relation between the two metrices. So we can conclude that a variety of floor plans can be made for a partical dimension floor plan.


In [12]:
# Ensure no division by zero for room_count
df = df[df["room_count"] > 0]

# Calculate additional metrics
df["floor_area"] = df["width"] * df["height"]  # Total floor area
df["room_density"] = df["floor_area"] / df["room_count"]  # Avg. room size
df["aspect_ratio"] = df["width"] / df["height"]  # Width-to-height ratio

# Display the updated DataFrame
df.head()


TypeError: '>' not supported between instances of 'tuple' and 'int'

In [None]:
def compute_floor_metrics(row):
    """Computes floor area, room density, and aspect ratio for a given row."""
    width = row["width"]
    height = row["height"]
    rooms = row["room_count"]

    # Calculate values
    floor_area = width * height  # Total area
    room_density = floor_area / rooms if rooms > 0 else 0  # Avoid division by zero
    aspect_ratio = width / height if height > 0 else 0  # Avoid division by zero

    return floor_area, room_density, aspect_ratio

# Apply function and store values in new columns
df[["floor_area", "room_density", "aspect_ratio"]] = df.apply(compute_floor_metrics, axis=1, result_type="expand")

# Check the updated DataFrame
df.head()


Unnamed: 0,filename,door,window,zone,exists,room_count,height,width,floor_area,room_density,aspect_ratio
381,1037_png.rf.003d4171dfaffd11622e9093c7122e23.jpg,1,1,1,1,0,416,416,173056.0,0.0,1.0
710,802_png.rf.6cbdd68ae183f2d2cf9923c10931b20f.jpg,1,1,1,1,0,416,403,167648.0,0.0,0.96875
225,1037_png.rf.aba52ecdd4235c917f216d85ebd4dfa5.jpg,1,1,1,1,0,416,412,171392.0,0.0,0.990385
165,802_png.rf.99b1c3f43b08e430752f979ad7047898.jpg,1,1,1,1,0,416,416,173056.0,0.0,1.0
818,739_png.rf.8a32a29c6ad2cdd7bc42cf6cc3678467.jpg,1,1,1,1,1,82,70,5740.0,5740.0,0.853659
