In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import joblib

# Function to generate linear data
def generate_linear_data(num_samples, slope_range=(-5, 5), intercept_range=(-10, 10), noise=0.5):
    X = np.random.uniform(-100, 100, num_samples)
    slope = np.random.uniform(*slope_range)
    intercept = np.random.uniform(*intercept_range)
    y = slope * X + intercept + np.random.normal(0, noise, num_samples)
    return X, y

# Generate dataset
num_samples = 1000
slope_range = (-5, 5)
intercept_range = (-10, 10)
noise = 0.5
X, y = generate_linear_data(num_samples, slope_range, intercept_range, noise)

# Save dataset to CSV
np.savetxt('linear_dataset.csv', np.column_stack((X, y)), delimiter=',', header='X,y', comments='')

# Load dataset
df = pd.read_csv('linear_dataset.csv')
X = df[['X']]  # Features
y = df['y']    # Target variable


# Print X and y coordinates
print("X coordinates:")
print(X.head())

print("\ny coordinates:")
print(y.head())

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate the model
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

print("\nTraining R^2 score:", train_score)
print("Testing R^2 score:", test_score)




# Save the trained model to a file
joblib.dump(model, 'linear_regression_model.pkl')

print("Model saved successfully.")



In [1]:
from sklearn.cluster import KMeans

# Assuming X and y are your features and target variable
# Generate dataset as in your previous code
num_samples = 1000
slope_range = (-5, 5)
intercept_range = (-10, 10)
noise = 0.5
X, y = generate_linear_data(num_samples, slope_range, intercept_range, noise)

# Concatenate features and target variable
data = np.column_stack((X, y))

# Apply K-means clustering
kmeans = KMeans(n_clusters=3)  # Adjust number of clusters as needed
kmeans.fit(data)

# Get cluster labels
cluster_labels = kmeans.labels_

# Print cluster labels
print("Cluster labels:", cluster_labels)


NameError: name 'generate_linear_data' is not defined

In [19]:
import cv2
import numpy as np
from sklearn.linear_model import LinearRegression

# Load the trained linear regression model
model = LinearRegression()
model = joblib.load('linear_regression_model.pkl')

# Function to read a downloaded image and preprocess it
def preprocess_image(image_path):
    # Read the image using cv2.imread()
    image = cv2.imread(image_path)
    # Convert the image to grayscale
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Flatten the 2D grayscale image into a 1D array
    flattened_image = grayscale_image.flatten()
    # Reshape the flattened image to have a single feature per row
    reshaped_image = flattened_image.reshape(-1, 1)
    return reshaped_image

# Example usage
image_path = '/content/images_output/slide19 (1).jpg'  # Replace 'path_to_downloaded_image.jpg' with the actual path to your downloaded image
image = preprocess_image(image_path)

# Check if the image was successfully preprocessed
if image is not None:
    # Apply the machine learning model to the preprocessed image
    prediction = model.predict(image)
    # Print the prediction
    print("Prediction:", prediction)
else:
    print("Failed to preprocess the image.")


Prediction: [947.56494023 947.56494023 947.56494023 ...  -2.6334914    1.09277696
 250.75275703]




In [2]:
import requests
from bs4 import BeautifulSoup
import os

# Function to scrape images from a given website URL
def scrape_images(url):
    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all image tags
        image_tags = soup.find_all('img')

        # Extract image URLs
        image_urls = [tag['src'] for tag in image_tags if 'src' in tag.attrs]

        return image_urls
    else:
        print("Failed to fetch URL:", url)

# Function to download images
def download_images(image_urls, output_dir):
    for i, url in enumerate(image_urls):
        # Send a GET request to the image URL
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Write the image content to a file
            with open(os.path.join(output_dir, f"image_{i}.jpg"), 'wb') as f:
                f.write(response.content)
        else:
            print("Failed to download image from URL:", url)


# Create the directory if it does not exist
output_directory = 'images_output'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Example usage
url = 'https://d138zd1ktt9iqe.cloudfront.net/media/seo_landing_files/revati-f-linear-graph-01-1605708494.png '
image_urls = scrape_images(url)
if image_urls:
    download_images(image_urls, 'images_output')


Failed to fetch URL: https://d138zd1ktt9iqe.cloudfront.net/media/seo_landing_files/revati-f-linear-graph-01-1605708494.png 


In [None]:
import os
import cv2
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
import joblib

# Load the trained linear regression model
model = joblib.load('linear_regression_model.pkl')

# Define desired width and height for resizing the images
desired_width = 100
desired_height = 100

# Function to load and preprocess images from a directory
def load_images_from_directory(directory):
    images = []
    for filename in os.listdir(directory):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            img = cv2.imread(os.path.join(directory, filename))
            # Preprocess the image
            img_processed = preprocess_image(img)
            images.append(img_processed)
    return images

# Function to preprocess the image
def preprocess_image(img):
    # Resize the image to the desired dimensions
    img_resized = cv2.resize(img, (desired_width, desired_height))
    # Flatten the image to a 1D array
    img_flat = img_resized.flatten()
    return img_flat

# Function to apply PCA to images
def apply_pca_to_images(images, n_components=1):
    pca = PCA(n_components=n_components)
    images_pca = pca.fit_transform(images)
    return images_pca

# Function to apply the machine learning model to images
def apply_model_to_images(images):
    predictions = []
    for img in images:
        # Check if the image array needs reshaping
        if img.ndim == 1:
            img = img.reshape(1, -1)
        # Make prediction
        prediction = model.predict(img)
        predictions.append(prediction)
    return predictions


# Function to print the output XY coordinates
def print_output_coordinates(predictions):
    for i, prediction in enumerate(predictions):
        # Check if prediction has at least one element
        if len(prediction) >= 2:
            print(f"Image {i+1} - X: {prediction[0]}, y: {prediction[1]}")
        else:
            print(f"Image {i+1} - X: {prediction[0]}, y: N/A (insufficient data)")


# Example usage
images_directory = 'images_output'
images = load_images_from_directory(images_directory)

# Apply PCA to images
images_pca = apply_pca_to_images(images)

# Apply the machine learning model to PCA-transformed images
predictions = apply_model_to_images(images_pca)

# Print the output XY coordinates
print_output_coordinates(predictions)


In [3]:
import requests
from bs4 import BeautifulSoup
import cv2
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt

# Function to create directory if it doesn't exist
def create_directory(directory_name):
    if not os.path.exists(directory_name):
        os.makedirs(directory_name)
        print(f"Directory '{directory_name}' created successfully.")
    else:
        print(f"Directory '{directory_name}' already exists.")

# Function to download images from URLs
def download_images(image_urls, output_dir):
    for i, url in enumerate(image_urls):
        if url.startswith('data:image/svg+xml'):
            continue  # Skip SVG images
        response = requests.get(url)
        with open(f'{output_dir}/graph_{i}.jpg', 'wb') as f:
            f.write(response.content)

# Function to detect graphs in images
def detect_graphs(image_files):
    detected_graphs = []
    for image_file in image_files:
        # Perform image processing and object detection using OpenCV or other libraries
        # For example, you can use edge detection to detect the graph boundaries
        image = cv2.imread(image_file)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        edges = cv2.Canny(gray, 50, 150)

        # Find contours
        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Draw contours on the original image
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Save the detected graph with contours
        output_file = f'detected_images/{os.path.basename(image_file)}'
        cv2.imwrite(output_file, image)

        # Append the path to the detected image to the list
        detected_graphs.append(output_file)

    return detected_graphs

# Function to annotate detected graphs
def annotate_graphs(graph_images):
    annotated_data = []
    for graph_image in graph_images:
        # Perform annotation by detecting data points on the graph
        # For example, you can use feature detection techniques to find data points

        # Dummy example: Adding random annotated data
        num_points = np.random.randint(5, 10)
        annotation_data = [(round(np.random.uniform(0, 10), 1), round(np.random.uniform(0, 10), 1)) for _ in range(num_points)]

        # Append annotation data to the list
        annotated_data.extend(annotation_data)

    return annotated_data

# Function to save annotation outputs
def save_annotations(annotation_data, output_file):
    annotation_df = pd.DataFrame(annotation_data, columns=['X', 'Y'])
    annotation_df.to_csv(output_file, index=False, float_format='%.1f')

# Web scraping
url = 'https://www.frontporchmath.com/topics/algebra/graphing-linear-equations/graphing-linear-equations-creating-table-coordinates-video/graphing-linear-equations-2/'  # Replace with the URL of the web page containing graph images
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
image_urls = [img['src'] for img in soup.find_all('img')]

# Create directory for downloaded images
create_directory('downloaded_images')

# Download images
download_images(image_urls, 'downloaded_images')

# Detect graphs in downloaded images
detected_graphs = detect_graphs(['downloaded_images/graph_0.jpg'])  # Example with a single image

# Annotate detected graphs
annotated_data = annotate_graphs(detected_graphs)

# Save annotation outputs
save_annotations(annotated_data, 'annotation_coordinates.csv')  # Example annotation data



Directory 'downloaded_images' already exists.
