In [6]:
import os
import io
import json
import base64
from openai import OpenAI
from google.oauth2 import service_account
import googleapiclient.discovery
from datetime import datetime
from urllib.parse import urlparse, parse_qs

In [7]:
os.environ["OPENAI_API_KEY"] = "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"
client = OpenAI()

In [8]:
credentials = service_account.Credentials.from_service_account_file('Googlecred.json', 
              scopes=['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive'])
drive_service = googleapiclient.discovery.build('drive', 'v3', credentials=credentials)
service = googleapiclient.discovery.build('sheets', 'v4', credentials=credentials)

In [9]:
def encode_image(image_content):
    return base64.b64encode(image_content).decode('utf-8')

In [10]:
def process_image(image_content):
    image_url = f"data:image/jpeg;base64,{encode_image(image_content)}"
    response = client.chat.completions.create(
        model='gpt-4-vision-preview', 
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Make the images straighten or correct their angles before identifying then only identify part number, part description, serial number, batch code, manufacturer, quantity(only number) and Return JSON document with data. Only return JSON not other text and be more accurate with data and check mainly for part_numbers it may look like part no., Part No: in the image for Rane Brake Lining Limited the format will be like eg=RBL/DP/019"},
                    {
                        "type": "image_url",
                        "image_url": {"url": image_url}
                    }
                ],
            }
        ],
        temperature=0,
        max_tokens=500
        
    )
    # Extract JSON data from the response and remove Markdown formatting
    json_string = response.choices[0].message.content
    json_string = json_string.replace("```json\n", "").replace("\n```", "")
    
    # Parse the string into a JSON object
    json_data = json.loads(json_string)
    
    return json_data

In [14]:
def get_folder_id_from_link(folder_link):
    parsed_url = urlparse(folder_link)
    path_components = parsed_url.path.split('/')
    # Extract the folder ID from the path
    folder_id = path_components[-1]
    # Check if the extracted ID looks valid (you might want to add more validation)
    if folder_id:
        return folder_id
    else:
        raise ValueError("Invalid Google Drive folder link.")

folder_link = 'https://drive.google.com/drive/folders/1ZFtPIFH2Lm9vqDgjkGtrEfMM9V70biy7?usp=drive_link'
folder_id = get_folder_id_from_link(folder_link)
print("Folder ID:", folder_id)

Folder ID: 1ZFtPIFH2Lm9vqDgjkGtrEfMM9V70biy7


In [15]:
response = drive_service.files().list(
    q=f"'{folder_id}' in parents and trashed=false",
    fields='files(id, name)',
).execute()

files = response.get('files', [])
files

[{'id': '1EVN0wS29gVVmvvZHdmrK2Emhe1lKhuWY',
  'name': 'IMG_20240420_180251.jpg'},
 {'id': '1aZHkWCx29B5Eog7lDIvy4AR-LoVCf7yP',
  'name': 'IMG_20240420_180120.jpg'},
 {'id': '1S93TzU3miF43QNHZzFi6_hFzczd8hJr7',
  'name': 'IMG_20240420_175843.jpg'},
 {'id': '1MOFUCwRD70clq6R2MewipgUEhXGuxgLT',
  'name': 'IMG_20240420_175759.jpg'},
 {'id': '1Bo1iw6t1Tb1nuOX3FTkGxMXhryoh4aIy',
  'name': 'IMG_20240420_171722.jpg'},
 {'id': '1ezu5FhvQERo8oXZbUoTuja7ynmqA_ez7',
  'name': 'IMG_20240420_171716.jpg'},
 {'id': '1-5SiQ3Vq5af_OXvoM7fUkDCyydlB1se_',
  'name': 'IMG_20240420_171701.jpg'},
 {'id': '1XUbAYrWS6KkOHm-JUMSNd3eFG3ju5bPl',
  'name': 'IMG_20240420_171645.jpg'},
 {'id': '1-DYyvdizkjxUkh1TKZwKpz7NKHlCSygY',
  'name': 'IMG_20240420_171612.jpg'},
 {'id': '1f1foBufjYsyERB03hRU-yL4YVo2Y9LR3',
  'name': 'IMG_20240420_171557.jpg'},
 {'id': '1Sgci4t_edvbBPn9BI-T83QBjNaZOHP0Q',
  'name': 'IMG_20240420_171534.jpg'},
 {'id': '1Ue8JFiToxDyfFMQPB2VsZ6bOkaLzli5H',
  'name': 'IMG_20240420_171509.jpg'},
 {'i

In [16]:
# Iterate over the files in the folder
for file in files:
    filename = file['name']
    file_id = file['id']
    print(filename)
    
    # Download file content
    request = drive_service.files().get_media(fileId=file_id)
    image_content = io.BytesIO(request.execute())
    
    # Reset the position of the BytesIO object to the beginning
    image_content.seek(0)
    
    json_data = process_image(image_content.read())
    
    # Extract specific attributes from JSON data
    part_number = json_data.get('part_number', '') 
    part_description = json_data.get('part_description', '')
    serial_number = json_data.get('serial_number', '')
    batch_code = json_data.get('batch_code', '')
    manufacturer = json_data.get('manufacturer', '')
    quantity = json_data.get('quantity', '')

    # Create the values list to write to Google Sheets
    values = [
        [filename, part_number, part_description, serial_number, batch_code, manufacturer, quantity, datetime.now().strftime('%Y-%m-%d %H:%M:%S')]
    ]
    
    # Write data to Google Sheets
    request = service.spreadsheets().values().append(
        spreadsheetId= '1wbytZEq3d_l0cMcTKcmqLRaZJRdROlNa-0yI1jt4wzI',
        range= 'sheet1',
        valueInputOption='RAW',
        body={'values': values}
    )
    response = request.execute()

print("Processing and saving complete.")

IMG_20240420_180251.jpg
IMG_20240420_180120.jpg
IMG_20240420_175843.jpg
IMG_20240420_175759.jpg
IMG_20240420_171722.jpg
IMG_20240420_171716.jpg
IMG_20240420_171701.jpg
IMG_20240420_171645.jpg
IMG_20240420_171612.jpg
IMG_20240420_171557.jpg
IMG_20240420_171534.jpg
IMG_20240420_171509.jpg
IMG_20240420_171446.jpg
IMG_20240420_171439.jpg
IMG_20240420_171421.jpg
IMG_20240420_171403.jpg
IMG_20240420_171348.jpg
IMG_20240420_171333.jpg
IMG_20240420_171319.jpg
IMG_20240420_171256.jpg
IMG_20240420_171231.jpg
IMG_20240420_171210.jpg
IMG_20240420_171155.jpg
IMG_20240420_171143.jpg
IMG_20240420_171133.jpg
IMG_20240420_171059.jpg
IMG_20240420_171050.jpg
IMG_20240420_171035.jpg
IMG_20240420_170839.jpg
IMG_20240420_170811.jpg
IMG_20240420_170738.jpg
IMG_20240420_170731.jpg
IMG_20240420_170715.jpg
IMG_20240420_152003.jpg
Processing and saving complete.
