In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

# Function to generate random annotation points within a specified range
def generate_random_annotation_points(num_sets, num_points_per_set_min, num_points_per_set_max, x_range=(0, 10), y_range=(0, 20)):
    annotation_points_sets = []
    for _ in range(num_sets):
        num_points_per_set = np.random.randint(num_points_per_set_min, num_points_per_set_max + 1)  # Randomly choose number of points
        annotation_points = [(np.random.uniform(x_range[0], x_range[1]), np.random.uniform(y_range[0], y_range[1]))
                             for _ in range(num_points_per_set)]
        annotation_points_sets.append(annotation_points)
    return annotation_points_sets

# Generate linear data without noise
np.random.seed(42)
num_samples = 1000
x = np.linspace(0, 10, num_samples)
y = 2 * x + 3  # Linear function without noise

# Train the linear regression model
model = LinearRegression()
model.fit(x.reshape(-1, 1), y)  # Reshape x to match the expected input shape

# Generate random annotation points sets with fully random number of points
num_sets = 10
num_points_per_set_min = 2  # Minimum number of points per set
num_points_per_set_max = 5  # Maximum number of points per set
annotation_points_sets = generate_random_annotation_points(num_sets, num_points_per_set_min, num_points_per_set_max)

# Save annotation points to a list
all_annotation_points = []

# Add annotation labels and calculate distances
annotations = []
explanation = []
for i, annotation_points in enumerate(annotation_points_sets):
    # Predict y values for annotation points using the model
    predicted_y = model.predict(np.array(annotation_points)[:, 0].reshape(-1, 1))

    # Update annotation_points with predicted y values
    updated_annotation_points = [(round(x_anno, 1), round(y_anno, 1)) for (x_anno, _), y_anno in zip(annotation_points, predicted_y)]

    # Append updated annotation points to the list
    all_annotation_points.extend(updated_annotation_points)

    # Print updated annotation coordinates rounded to 1 decimal place
    print(f"Updated Annotation coordinates for Set {i+1}:")
    print(updated_annotation_points)

    # Create annotations
    annotations.append(updated_annotation_points)

    # Create explanation
    explanation.append(f"Explanation for Annotation Set {i+1}: This is a straight line graph with xy axis. The available coordinates are:")

# Set the directory for the images
image_directory = "graph_images"

# Create the directory if it doesn't exist
os.makedirs(image_directory, exist_ok=True)

# Save the images with annotations
for i, annotation_points in enumerate(annotations):
    plt.figure(figsize=(8, 6))
    plt.plot(x, y, label='Linear Data')
    for (x_anno, y_anno) in annotation_points:
        plt.scatter(x_anno, y_anno, color='red', label='Annotation Point', zorder=5)
        plt.text(x_anno, y_anno, f'({x_anno:.1f}, {y_anno:.1f})', fontsize=8, ha='right')
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.title(f'Linear Data with Annotations (Set {i+1})')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(image_directory, f'graph_with_annotations_set_{i+1}.png'))
    plt.close()

# Save annotation coordinates along with image path, annotations, and explanation to a CSV file
annotation_csv_path = os.path.join(image_directory, 'annotation_data.csv')
image_paths = [os.path.join(image_directory, f'graph_with_annotations_set_{i+1}.png') for i in range(num_sets)]
annotation_df = pd.DataFrame({'Image_Path': image_paths,
                              'Annotations': annotations,
                              'Explanation': explanation})
annotation_df.to_csv(annotation_csv_path, index=False)

# Train a simple linear regression model
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

lr_model = LinearRegression()
lr_model.fit(X_train.reshape(-1, 1), y_train)

# Evaluate the model
train_score = lr_model.score(X_train.reshape(-1, 1), y_train)
test_score = lr_model.score(X_test.reshape(-1, 1), y_test)

print("Linear Regression Model Evaluation:")
print(f"Training R^2 Score: {train_score:.4f}")
print(f"Testing R^2 Score: {test_score:.4f}")

# Save the trained model to a file
model_path = os.path.join(image_directory, 'linear_regression_model.pkl')
joblib.dump(lr_model, model_path)

print("Dataset created and saved successfully.")
print("Linear Regression Model trained and saved successfully.")


Updated Annotation coordinates for Set 1:
[(8.0, 18.9), (7.8, 18.6), (4.5, 11.9), (4.6, 12.2)]
Updated Annotation coordinates for Set 2:
[(7.1, 17.2), (9.7, 22.4), (2.1, 7.2), (1.8, 6.7), (5.2, 13.5)]
Updated Annotation coordinates for Set 3:
[(5.2, 13.5), (0.5, 3.9)]
Updated Annotation coordinates for Set 4:
[(4.6, 12.1), (2.0, 7.0), (5.9, 14.8), (6.1, 15.2)]
Updated Annotation coordinates for Set 5:
[(0.1, 3.3), (5.6, 14.3), (0.2, 3.3), (2.4, 7.8)]
Updated Annotation coordinates for Set 6:
[(5.0, 12.9), (9.1, 21.2), (6.6, 16.3), (5.2, 13.4), (1.8, 6.7)]
Updated Annotation coordinates for Set 7:
[(4.5, 12.0), (9.3, 21.5), (3.3, 9.5)]
Updated Annotation coordinates for Set 8:
[(0.5, 3.9), (3.9, 10.8), (8.3, 19.6), (2.8, 8.6), (1.4, 5.8)]
Updated Annotation coordinates for Set 9:
[(0.2, 3.3), (3.9, 10.9)]
Updated Annotation coordinates for Set 10:
[(8.2, 19.3), (7.3, 17.6), (0.7, 4.5), (1.2, 5.3)]
Linear Regression Model Evaluation:
Training R^2 Score: 1.0000
Testing R^2 Score: 1.0000
D