In [1]:
pip install requests
pip install beautifulsoup4
pip install opencv-python
pip install numpy
pip install tensorflow
pip install pillow

SyntaxError: invalid syntax (4232109895.py, line 1)

In [None]:
import os
import requests
from bs4 import BeautifulSoup
import urllib.parse

def fetch_image_urls(query, max_links_to_fetch, headers):
    search_url = f"https://www.google.com/search?q={query}&tbm=isch"
    response = requests.get(search_url, headers=headers)
    html_content = response.text
    soup = BeautifulSoup(html_content, 'html.parser')
    image_elements = soup.find_all('img', class_='rg_i')
    
    image_urls = []
    for img in image_elements:
        if img.has_attr('src'):
            image_urls.append(img['src'])
        if len(image_urls) >= max_links_to_fetch:
            break
    
    return image_urls

def download_images(image_urls, save_dir):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for i, url in enumerate(image_urls):
        try:
            response = requests.get(url)
            with open(os.path.join(save_dir, f'image_{i}.jpg'), 'wb') as f:
                f.write(response.content)
        except Exception as e:
            print(f"Could not download image {i}: {e}")

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}

boat_urls = fetch_image_urls('boat', max_links_to_fetch=50, headers=headers)
yacht_urls = fetch_image_urls('yacht', max_links_to_fetch=50, headers=headers)

download_images(boat_urls, 'boat_images')
download_images(yacht_urls, 'yacht_images')


In [None]:
import cv2
import numpy as np
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))
    image = preprocess_input(image)
    return image

def extract_features(image_dir):
    model = VGG16(weights='imagenet', include_top=False)
    features = []
    for image_name in os.listdir(image_dir):
        image_path = os.path.join(image_dir, image_name)
        image = preprocess_image(image_path)
        image = np.expand_dims(image, axis=0)
        feature = model.predict(image)
        features.append(feature.flatten())
    return features

boat_features = extract_features('boat_images')
yacht_features = extract_features('yacht_images')


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# 특징과 레이블 준비
X = np.array(boat_features + yacht_features)
y = np.array([0] * len(boat_features) + [1] * len(yacht_features))  # 0: Boat, 1: Yacht

# 학습 데이터와 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# SVM 분류기 학습
classifier = SVC(kernel='linear', probability=True)
classifier.fit(X_train, y_train)

# 모델 평가
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# 특정 이미지를 선별하여 저장
def classify_and_save(image_dir, classifier, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for image_name in os.listdir(image_dir):
        image_path = os.path.join(image_dir, image_name)
        image = preprocess_image(image_path)
        image = np.expand_dims(image, axis=0)
        feature = model.predict(image).flatten()
        pred = classifier.predict([feature])[0]
        
        if pred == 1:  # Yacht로 분류된 이미지 저장
            output_path = os.path.join(output_dir, image_name)
            cv2.imwrite(output_path, cv2.imread(image_path))

classify_and_save('boat_images', classifier, 'selected_images/boat')
classify_and_save('yacht_images', classifier, 'selected_images/yacht')


In [None]:
# 이미지 크기와 경로 설정
IMG_SIZE = 128
BOAT_DIR = 'boat_images'
YACHT_DIR = 'yacht_images'

def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        try:
            img = cv2.imread(img_path)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            images.append(img)
            labels.append(label)
        except Exception as e:
            print(f"Failed to process image {filename}: {e}")
    return images, labels

# 보트와 요트 이미지 불러오기
boat_images, boat_labels = load_images_from_folder(BOAT_DIR, 0)  # 0 for boat
yacht_images, yacht_labels = load_images_from_folder(YACHT_DIR, 1)  # 1 for yacht

# 이미지와 레이블 결합
X = np.array(boat_images + yacht_images)
y = np.array(boat_labels + yacht_labels)

# 데이터 정규화 (0~1 사이 값으로 변환)
X = X.astype('float32') / 255.0

# 데이터셋 분할 (80% 학습, 20% 테스트)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# 이미지 크기와 경로 설정
IMG_SIZE = 128
BOAT_DIR = 'boat_images'
YACHT_DIR = 'yacht_images'

def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        try:
            img = cv2.imread(img_path)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            images.append(img)
            labels.append(label)
        except Exception as e:
            print(f"Failed to process image {filename}: {e}")
    return images, labels

# 보트와 요트 이미지 불러오기
boat_images, boat_labels = load_images_from_folder(BOAT_DIR, 0)  # 0 for boat
yacht_images, yacht_labels = load_images_from_folder(YACHT_DIR, 1)  # 1 for yacht

# 이미지와 레이블 결합
X = np.array(boat_images + yacht_images)
y = np.array(boat_labels + yacht_labels)

# 데이터 정규화 (0~1 사이 값으로 변환)
X = X.astype('float32') / 255.0

# 데이터셋 분할 (80% 학습, 20% 테스트)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# CNN 모델 정의
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # 이진 분류
])

# 모델 컴파일
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
history = model.fit(X_train, y_train, validation_split=0.1, epochs=10, batch_size=32)

# 모델 평가
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Test accuracy: {test_accuracy:.2f}")
print(f"Test loss: {test_loss:.2f}")

# 정확도 그래프
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# 손실도 그래프
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


ModuleNotFoundError: No module named 'cv2'