In [33]:
import openai
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.summarize import load_summarize_chain


In [34]:
#pip install tiktoken

In [35]:
# Load the cleaned data from CSV into a pandas DataFrame
cleaned_data = pd.read_csv("C:\\Users\\Dell\\OneDrive - Northeastern University\\courses\\big data and intl analytics\\DAMG7245-Summer2023\\final project\\dataset_converted\\3d_printing\\posts_cleaned.csv")
comments_df = pd.read_csv("C:\\Users\\Dell\\OneDrive - Northeastern University\\courses\\big data and intl analytics\\DAMG7245-Summer2023\\final project\\dataset_converted\\3d_printing\\comments_cleaned.csv")
# Load the pre-trained Sentence Transformers model
model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens')


In [36]:
#pip install openai

In [37]:
#pip install langchain

In [38]:
# Preprocessing function to convert tags from string to list
def process_tags(Tags):
    if pd.isna(Tags):  # Handle NaN values
        return []      # Return an empty list for NaN values
    return Tags.split(',')

# Apply the preprocessing to the 'Tags' column (uppercase "T")
cleaned_data['post_tags'] = cleaned_data['post_tags'].apply(process_tags)

def generate_embeddings(text):
    embeddings = model.encode(text)
    return embeddings

def filter_data(user_input_tag):
    # Keep rows with "posttypeid" as 1 or 2
    filtered_data = cleaned_data

    # Keep only data for the user input tag
    filtered_data = filtered_data[filtered_data['post_tags'].apply(lambda tags: user_input_tag in tags)]

    return filtered_data



In [39]:
def topic_relevance_search(user_input, user_input_tag, top_tags=10):
    # Filter data based on user input tag and "PostTypeId"
    filtered_data = filter_data(user_input_tag)

    # Check if there is relevant data for the user input tag
    if filtered_data.empty:
        print(f"No relevant data found for the tag '{user_input_tag}'.")
        return []

    # Concatenate relevant columns for embedding, handling NaN values
    relevant_text = filtered_data.apply(lambda row: ' '.join(filter(lambda x: pd.notna(x), [row['post_title'], row['post_body']])), axis=1)

    # Convert relevant_text to a list
    relevant_text_list = relevant_text.tolist()

    # Generate embeddings for user input and relevant data
    user_embedding = generate_embeddings([user_input])
    data_embeddings = generate_embeddings(relevant_text_list)

    # Calculate cosine similarity between user input and data embeddings
    similarity_scores = cosine_similarity(user_embedding, data_embeddings)[0]

    # Sort the data by similarity scores in descending order and select top 5 rows
    top_indices = np.argsort(similarity_scores)[::-1][:5]
    
    top_similar_topics = filtered_data.iloc[top_indices][['post_id', 'post_title', 'posttypeid', 'post_body', 'post_body_accepted']].to_dict('records')
    
    top_similarity_scores = similarity_scores[top_indices]

    return top_similar_topics, top_similarity_scores



In [40]:
# def generate_summary(data, openai_api_key):
#     if openai_api_key:
#         # Truncate data to a maximum of 2048 tokens
#         max_tokens = 2048
#         data_tokens = data.split()
#         if len(data_tokens) > max_tokens:
#             data = ' '.join(data_tokens[:max_tokens])

#         # Compute summaries using OpenAI
#         openai.api_key = openai_api_key
#         response = openai.Completion.create(
#             model="text-davinci-003",
#             prompt=f"   \n\n{data}",
#             temperature=0,
#             max_tokens=2500,
#             top_p=1.0,
#             frequency_penalty=0.0,
#             presence_penalty=0.0
#         )
#         summary = response['choices'][0]['text']
        
#         return summary



In [41]:
if __name__ == "__main__":
    # Example usage:
    user_question = "ring"
    user_tag = "<3d-design>"
    
    

    similar_topics, similarity_scores = topic_relevance_search(user_question, user_tag)
    
    print("Most similar topics:")
    
    for i in range(len(similar_topics)):
        post_id, post_title, posttypeid, post_body, post_body_accepted = similar_topics[i]['post_id'], similar_topics[i]['post_title'], similar_topics[i]['posttypeid'], similar_topics[i]['post_body'], similar_topics[i]['post_body_accepted']
        
        print(f"{i+1}. Post ID: {post_id} : {post_title} (similarity score: {similarity_scores[i]:.2f})")

Most similar topics:
1. Post ID: 4672 : Designing back plate for Ring Doorbell (similarity score: 0.21)
2. Post ID: 20570 : How would this OLED be mounted if in a commercial device? (similarity score: 0.08)
3. Post ID: 8172 : How do I decide what size my push-fit feature should be? (similarity score: 0.07)
4. Post ID: 4681 : Make a nose cone in Fusion 360 (similarity score: 0.02)
5. Post ID: 16106 : Source of design advice for modeling functional parts (similarity score: -0.00)


In [90]:
selected_topic_post_id = 4672
    
selected_topic_index = next(i for i in range(len(similar_topics)) if similar_topics[i]['post_id'] == selected_topic_post_id)
    
selected_topic = similar_topics[selected_topic_index]
    
post_id, post_title, posttypeid, post_body, post_body_accepted = selected_topic['post_id'], selected_topic['post_title'], selected_topic['posttypeid'], selected_topic['post_body'], selected_topic['post_body_accepted']
    
print(f"\nSelected topic: {post_title}")
    
if pd.isna(post_body_accepted):
        print("No accepted answer")
        accepted_answer_text = ""
else:
        print(f"Accepted answer: {post_body_accepted}")
        accepted_answer_text = f"\n\nAccepted answer:\n{post_body_accepted}"
    
if posttypeid == 1:
        print(f"Description: {post_body}")
        post_text = f"\n\nDescription:\n{post_body}"
elif posttypeid == 2:
        print(f"Answer: {post_body}")
        post_text = f"\n\nAnswer:\n{post_body}"
    
post_comments = comments_df[comments_df['post_id'] == post_id]
    
comments_text = ""
if not post_comments.empty:
        print("Comments:")
        for comment in post_comments['comments_text']:
            print(f"\t- {comment}")
            comments_text += f"\n- {comment}"
    


Selected topic: Designing back plate for Ring Doorbell
No accepted answer
Description: <p>What is a good tool to use to design a back plate for a Ring Doorbell?  The shape will essentially be a wedge shape, tilting the doorbell to the side and down a bit.  I'll need to iterate over a few prints, so want to be able to tweak it as I go.</p>

<p>I've found TinkerCAD doesn't let me adjust shape sizes, stretching one axis but not the other.</p>

Comments:
	- StackExchange has a policy against asking questions that are "primarily opinion-based" (explained in the [help center](https://3dprinting.stackexchange.com/help/dont-ask)) because they often don't result in very good answers (since everybody will just post a one-liner with their personal favorite CAD tool). Also, **don't answer questions in the comments**, please.


In [91]:
from langchain.llms.openai import OpenAI
from langchain.chains.summarize import load_summarize_chain

In [92]:
class Document:
    def __init__(self, page_content):
        self.page_content = page_content
        self.metadata = {}

In [93]:
data = f"{post_title}{post_text}{accepted_answer_text}\n\nComments:{comments_text}"
data

'Designing back plate for Ring Doorbell\n\nDescription:\n<p>What is a good tool to use to design a back plate for a Ring Doorbell?  The shape will essentially be a wedge shape, tilting the doorbell to the side and down a bit.  I\'ll need to iterate over a few prints, so want to be able to tweak it as I go.</p>\n\n<p>I\'ve found TinkerCAD doesn\'t let me adjust shape sizes, stretching one axis but not the other.</p>\n\n\nComments:\n- StackExchange has a policy against asking questions that are "primarily opinion-based" (explained in the [help center](https://3dprinting.stackexchange.com/help/dont-ask)) because they often don\'t result in very good answers (since everybody will just post a one-liner with their personal favorite CAD tool). Also, **don\'t answer questions in the comments**, please.'

In [94]:
# Create a Document object from the data string
doc = Document(page_content=data)

In [95]:
# pip install --upgrade langchain

In [97]:
# Initialize the OpenAI module and load the summarize chain
llm = OpenAI(temperature=0, openai_api_key = "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")
chain = load_summarize_chain(llm, chain_type="stuff")

# Use the string variable as input to the summarization chain
summary = chain.run(input_documents=[doc], question="Write a concise summary within 200 words.")

In [98]:
summary

' This post is asking for a good tool to design a back plate for a Ring Doorbell. The shape will be a wedge shape, and the user needs to be able to iterate over a few prints. TinkerCAD was mentioned, but it does not allow for shape size adjustments. StackExchange has a policy against opinion-based questions, and users are asked not to answer questions in the comments.'

### Feature 3

In [99]:
import openai


In [100]:
openai_api_key = ""
# User input
user_input = "how to design a back plate for a Ring Doorbell?"
# Concatenate the data: post_title, post_text, accepted_answer_text, and comments_text
data = f"{post_title}{post_text}{accepted_answer_text}\n\nComments:{comments_text}"
openai.api_key = openai_api_key

In [101]:
def get_openai_response(user_input, data):
    # Define the conversation history with system, user, and data messages
    conversation = f"You are a helpful assistant.\n\nUser: {user_input}\n\nData: {data}"

    # Use the OpenAI API to generate a response
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "Give the answer to the user input using the data. If the answer doesn't exist in data, create your own answer."},
            {"role": "user", "content": user_input},
            {"role": "system", "content": f"Data: {data}"}
        ],
        temperature=0.2,
        max_tokens=500
    )

    # Extract and return the model-generated answer from the response
    return response["choices"][0]["message"]["content"].strip()

In [102]:
# Get the model's response based on the user input and the given data
answer = get_openai_response(user_input, data)

# Print the generated answer
print("AI: " + answer)

AI: You can use various CAD software tools to design a back plate for a Ring Doorbell. Some popular options include:

1. Fusion 360: This is a professional-grade CAD software that offers a wide range of features and allows for precise design adjustments. It provides parametric modeling capabilities, making it easier to iterate and tweak your design.

2. SolidWorks: Another powerful CAD software widely used in the industry. SolidWorks offers advanced modeling tools and allows for easy modification of dimensions and shapes.

3. Onshape: This is a cloud-based CAD software that allows for collaborative design and easy access from different devices. It offers a range of design tools and allows for iterative design changes.

4. FreeCAD: If you are looking for a free and open-source option, FreeCAD is a good choice. It provides a comprehensive set of tools for 3D modeling and allows for design modifications.

Remember, it's important to choose a CAD software that suits your needs and preferen