In [1]:
import openai
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.summarize import load_summarize_chain


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#pip install tiktoken

In [3]:
# Load the cleaned data from CSV into a pandas DataFrame
cleaned_data = pd.read_csv("C:\\Users\\Dell\\OneDrive - Northeastern University\\courses\\big data and intl analytics\\DAMG7245-Summer2023\\final project\\dataset_converted\\3d_printing\\posts_cleaned.csv")
comments_df = pd.read_csv("C:\\Users\\Dell\\OneDrive - Northeastern University\\courses\\big data and intl analytics\\DAMG7245-Summer2023\\final project\\dataset_converted\\3d_printing\\comments_cleaned.csv")
# Load the pre-trained Sentence Transformers model
model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens')


In [4]:
#pip install openai

In [5]:
#pip install langchain

In [6]:
# Preprocessing function to convert tags from string to list
def process_tags(Tags):
    if pd.isna(Tags):  # Handle NaN values
        return []      # Return an empty list for NaN values
    return Tags.split(',')

# Apply the preprocessing to the 'Tags' column (uppercase "T")
cleaned_data['post_tags'] = cleaned_data['post_tags'].apply(process_tags)

def generate_embeddings(text):
    embeddings = model.encode(text)
    return embeddings

def filter_data(user_input_tag):
    # Keep rows with "posttypeid" as 1 or 2
    filtered_data = cleaned_data

    # Keep only data for the user input tag
    filtered_data = filtered_data[filtered_data['post_tags'].apply(lambda tags: user_input_tag in tags)]

    return filtered_data



In [7]:
def topic_relevance_search(user_input, user_input_tag, top_tags=10):
    # Filter data based on user input tag and "PostTypeId"
    filtered_data = filter_data(user_input_tag)

    # Check if there is relevant data for the user input tag
    if filtered_data.empty:
        print(f"No relevant data found for the tag '{user_input_tag}'.")
        return []

    # Concatenate relevant columns for embedding, handling NaN values
    relevant_text = filtered_data.apply(lambda row: ' '.join(filter(lambda x: pd.notna(x), [row['post_title'], row['post_body']])), axis=1)

    # Convert relevant_text to a list
    relevant_text_list = relevant_text.tolist()

    # Generate embeddings for user input and relevant data
    user_embedding = generate_embeddings([user_input])
    data_embeddings = generate_embeddings(relevant_text_list)

    # Calculate cosine similarity between user input and data embeddings
    similarity_scores = cosine_similarity(user_embedding, data_embeddings)[0]

    # Sort the data by similarity scores in descending order and select top 5 rows
    top_indices = np.argsort(similarity_scores)[::-1][:5]
    
    top_similar_topics = filtered_data.iloc[top_indices][['post_id', 'post_title', 'posttypeid', 'post_body', 'post_body_accepted']].to_dict('records')
    
    top_similarity_scores = similarity_scores[top_indices]

    return top_similar_topics, top_similarity_scores



In [8]:
# def generate_summary(data, openai_api_key):
#     if openai_api_key:
#         # Truncate data to a maximum of 2048 tokens
#         max_tokens = 2048
#         data_tokens = data.split()
#         if len(data_tokens) > max_tokens:
#             data = ' '.join(data_tokens[:max_tokens])

#         # Compute summaries using OpenAI
#         openai.api_key = openai_api_key
#         response = openai.Completion.create(
#             model="text-davinci-003",
#             prompt=f"   \n\n{data}",
#             temperature=0,
#             max_tokens=2500,
#             top_p=1.0,
#             frequency_penalty=0.0,
#             presence_penalty=0.0
#         )
#         summary = response['choices'][0]['text']
        
#         return summary



In [9]:
if __name__ == "__main__":
    # Example usage:
    user_question = "ring"
    user_tag = "<3d-design>"
    
    

    similar_topics, similarity_scores = topic_relevance_search(user_question, user_tag)
    
    print("Most similar topics:")
    
    for i in range(len(similar_topics)):
        post_id, post_title, posttypeid, post_body, post_body_accepted = similar_topics[i]['post_id'], similar_topics[i]['post_title'], similar_topics[i]['posttypeid'], similar_topics[i]['post_body'], similar_topics[i]['post_body_accepted']
        
        print(f"{i+1}. Post ID: {post_id} : {post_title} (similarity score: {similarity_scores[i]:.2f})")

Most similar topics:
1. Post ID: 4672 : Designing back plate for Ring Doorbell (similarity score: 0.21)
2. Post ID: 20570 : How would this OLED be mounted if in a commercial device? (similarity score: 0.08)
3. Post ID: 8172 : How do I decide what size my push-fit feature should be? (similarity score: 0.07)
4. Post ID: 4681 : Make a nose cone in Fusion 360 (similarity score: 0.02)
5. Post ID: 16106 : Source of design advice for modeling functional parts (similarity score: -0.00)


In [10]:
selected_topic_post_id = 4681
    
selected_topic_index = next(i for i in range(len(similar_topics)) if similar_topics[i]['post_id'] == selected_topic_post_id)
    
selected_topic = similar_topics[selected_topic_index]
    
post_id, post_title, posttypeid, post_body, post_body_accepted = selected_topic['post_id'], selected_topic['post_title'], selected_topic['posttypeid'], selected_topic['post_body'], selected_topic['post_body_accepted']
    
print(f"\nSelected topic: {post_title}")
    
if pd.isna(post_body_accepted):
        print("No accepted answer")
        accepted_answer_text = ""
else:
        print(f"Accepted answer: {post_body_accepted}")
        accepted_answer_text = f"\n\nAccepted answer:\n{post_body_accepted}"
    
if posttypeid == 1:
        print(f"Description: {post_body}")
        post_text = f"\n\nDescription:\n{post_body}"
elif posttypeid == 2:
        print(f"Answer: {post_body}")
        post_text = f"\n\nAnswer:\n{post_body}"
    
post_comments = comments_df[comments_df['post_id'] == post_id]
    
comments_text = ""
if not post_comments.empty:
        print("Comments:")
        for comment in post_comments['comments_text']:
            print(f"\t- {comment}")
            comments_text += f"\n- {comment}"
    


Selected topic: Make a nose cone in Fusion 360
Accepted answer: <p>If you have a specific shape in mind and can create a sketch to represent that shape, you are halfway to your goal.</p>

<p>The concept is simple. Create a single line sketch that would represent the desired curve, starting from, in this example, the nose of the cone and traveling to the base. Create only one-half of the nose cone curve and maintain a "standard" axis reference, say, using the Y-axis as the rotation point.</p>

<p>The process is called <a href="http://help.autodesk.com/view/fusion360/ENU/?guid=GUID-5C62F370-7AB4-4DFD-BE61-F8830F30A6D7" rel="noreferrer">revolve. Fusion 360</a> supports this action directly.</p>

<pre><code>In the Sculpt workspace, choose Create Revolve.
Select the profile to revolve.
In the Revolve dialog:
    Click Axis and then select the axis to revolve around.
    Choose Full or Angle to specify whether the revolution is full or to a specific angle.
    For Direction choose One Side,

In [22]:
from langchain.llms.openai import OpenAI
from langchain.chains.summarize import load_summarize_chain

In [24]:
class Document:
    def __init__(self, page_content):
        self.page_content = page_content
        self.metadata = {}

In [29]:
data = f"{post_title}{post_text}{accepted_answer_text}\n\nComments:{comments_text}"
data

'Make a nose cone in Fusion 360\n\nDescription:\n<p>I am new to Fusion 360 and I think I\'m going straight to something complicated. Is there a way to make a nose cone for a model rocket? What tools would one you to accomplish this?</p>\n\n\nAccepted answer:\n<p>If you have a specific shape in mind and can create a sketch to represent that shape, you are halfway to your goal.</p>\n\n<p>The concept is simple. Create a single line sketch that would represent the desired curve, starting from, in this example, the nose of the cone and traveling to the base. Create only one-half of the nose cone curve and maintain a "standard" axis reference, say, using the Y-axis as the rotation point.</p>\n\n<p>The process is called <a href="http://help.autodesk.com/view/fusion360/ENU/?guid=GUID-5C62F370-7AB4-4DFD-BE61-F8830F30A6D7" rel="noreferrer">revolve. Fusion 360</a> supports this action directly.</p>\n\n<pre><code>In the Sculpt workspace, choose Create Revolve.\nSelect the profile to revolve.\nIn t

In [26]:
# Create a Document object from the data string
doc = Document(page_content=data)

In [13]:
# pip install --upgrade langchain

In [27]:
# Initialize the OpenAI module and load the summarize chain
llm = OpenAI(temperature=0, openai_api_key="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")
chain = load_summarize_chain(llm, chain_type="stuff")

# Use the string variable as input to the summarization chain
summary = chain.run(input_documents=[doc], question="Write a concise summary within 200 words.")

In [28]:
summary

'\nUsing Fusion 360, one can create a nose cone for a model rocket by creating a single line sketch of the desired shape, then using the Revolve tool to create the cone. The sketch should start from the nose and travel to the base, and the base segment should be joined while the nose segments should be open and aligned to the Y-axis. Offsetting or hand sketching a parallel line that returns to the nose can add thickness. Tutorials and videos are available online for further guidance.'